diff --git a/.do/app.yaml b/.do/app.yaml new file mode 100644 index 0000000..00d7b78 --- /dev/null +++ b/.do/app.yaml @@ -0,0 +1,19 @@ +alerts: +- rule: DEPLOYMENT_FAILED +- rule: DOMAIN_FAILED +name: crawl4ai +region: nyc +services: +- dockerfile_path: Dockerfile + github: + branch: 0.3.74 + deploy_on_push: true + repo: unclecode/crawl4ai + health_check: + http_path: /health + http_port: 11235 + instance_count: 1 + instance_size_slug: professional-xs + name: web + routes: + - path: / \ No newline at end of file diff --git a/.do/deploy.template.yaml b/.do/deploy.template.yaml new file mode 100644 index 0000000..9a06a36 --- /dev/null +++ b/.do/deploy.template.yaml @@ -0,0 +1,22 @@ +spec: + name: crawl4ai + services: + - name: crawl4ai + git: + branch: 0.3.74 + repo_clone_url: https://github.com/unclecode/crawl4ai.git + dockerfile_path: Dockerfile + http_port: 11235 + instance_count: 1 + instance_size_slug: professional-xs + health_check: + http_path: /health + envs: + - key: INSTALL_TYPE + value: "basic" + - key: PYTHON_VERSION + value: "3.10" + - key: ENABLE_GPU + value: "false" + routes: + - path: / \ No newline at end of file diff --git a/.gitignore b/.gitignore index 4c3e151..8e96fa8 100644 --- a/.gitignore +++ b/.gitignore @@ -199,6 +199,7 @@ test_env/ **/.DS_Store todo.md +todo_executor.md git_changes.py git_changes.md pypi_build.sh @@ -208,4 +209,8 @@ git_issues.md .tests/ .issues/ .docs/ -.issues/ \ No newline at end of file +.issues/ +.gitboss/ +todo_executor.md +protect-all-except-feature.sh +manage-collab.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index a377d79..8e5cc91 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,557 @@ # Changelog +## [0.3.74] November 17, 2024 + +This changelog details the updates and changes introduced in Crawl4AI version 0.3.74. It's designed to inform developers about new features, modifications to existing components, removals, and other important information. + +### 1. File Download Processing + +- Users can now specify download folders using the `downloads_path` parameter in the `AsyncWebCrawler` constructor or the `arun` method. If not specified, downloads are saved to a "downloads" folder within the `.crawl4ai` directory. +- File download tracking is integrated into the `CrawlResult` object. Successfully downloaded files are listed in the `downloaded_files` attribute, providing their paths. +- Added `accept_downloads` parameter to the crawler strategies (defaults to `False`). If set to True you can add JS code and `wait_for` parameter for file download. + +**Example:** + +```python +import asyncio +import os +from pathlib import Path +from crawl4ai import AsyncWebCrawler + +async def download_example(): + downloads_path = os.path.join(Path.home(), ".crawl4ai", "downloads") + os.makedirs(downloads_path, exist_ok=True) + + async with AsyncWebCrawler( + accept_downloads=True, + downloads_path=downloads_path, + verbose=True + ) as crawler: + result = await crawler.arun( + url="https://www.python.org/downloads/", + js_code=""" + const downloadLink = document.querySelector('a[href$=".exe"]'); + if (downloadLink) { downloadLink.click(); } + """, + wait_for=5 # To ensure download has started + ) + + if result.downloaded_files: + print("Downloaded files:") + for file in result.downloaded_files: + print(f"- {file}") + +asyncio.run(download_example()) + +``` + +### 2. Refined Content Filtering + +- Introduced the `RelevanceContentFilter` strategy (and its implementation `BM25ContentFilter`) for extracting relevant content from web pages, replacing Fit Markdown and other content cleaning strategy. This new strategy leverages the BM25 algorithm to identify chunks of text relevant to the page's title, description, keywords, or a user-provided query. +- The `fit_markdown` flag in the content scraper is used to filter content based on title, meta description, and keywords. + +**Example:** + +```python +from crawl4ai import AsyncWebCrawler +from crawl4ai.content_filter_strategy import BM25ContentFilter + +async def filter_content(url, query): + async with AsyncWebCrawler() as crawler: + content_filter = BM25ContentFilter(user_query=query) + result = await crawler.arun(url=url, extraction_strategy=content_filter, fit_markdown=True) + print(result.extracted_content) # Or result.fit_markdown for the markdown version + print(result.fit_html) # Or result.fit_html to show HTML with only the filtered content + +asyncio.run(filter_content("https://en.wikipedia.org/wiki/Apple", "fruit nutrition health")) +``` + +### 3. Raw HTML and Local File Support + +- Added support for crawling local files and raw HTML content directly. +- Use the `file://` prefix for local file paths. +- Use the `raw:` prefix for raw HTML strings. + +**Example:** + +```python +async def crawl_local_or_raw(crawler, content, content_type): + prefix = "file://" if content_type == "local" else "raw:" + url = f"{prefix}{content}" + result = await crawler.arun(url=url) + if result.success: + print(f"Markdown Content from {content_type.title()} Source:") + print(result.markdown) + +# Example usage with local file and raw HTML +async def main(): + async with AsyncWebCrawler() as crawler: + # Local File + await crawl_local_or_raw( + crawler, os.path.abspath('tests/async/sample_wikipedia.html'), "local" + ) + # Raw HTML + await crawl_raw_html(crawler, "

Raw Test

This is raw HTML.

") + + +asyncio.run(main()) +``` + +### 4. Browser Management + +- New asynchronous crawler strategy implemented using Playwright. +- `ManagedBrowser` class introduced for improved browser session handling, offering features like persistent browser sessions between requests (using `session_id` parameter) and browser process monitoring. +- Updated to tf-playwright-stealth for enhanced stealth capabilities. +- Added `use_managed_browser`, `use_persistent_context`, and `chrome_channel` parameters to AsyncPlaywrightCrawlerStrategy. + + +**Example:** +```python +async def browser_management_demo(): + user_data_dir = os.path.join(Path.home(), ".crawl4ai", "user-data-dir") + os.makedirs(user_data_dir, exist_ok=True) # Ensure directory exists + async with AsyncWebCrawler( + use_managed_browser=True, + user_data_dir=user_data_dir, + use_persistent_context=True, + verbose=True + ) as crawler: + result1 = await crawler.arun( + url="https://example.com", session_id="my_session" + ) + result2 = await crawler.arun( + url="https://example.com/anotherpage", session_id="my_session" + ) + +asyncio.run(browser_management_demo()) +``` + + +### 5. API Server & Cache Improvements + +- Added CORS support to API server. +- Implemented static file serving. +- Enhanced root redirect functionality. +- Cache database updated to store response headers and downloaded files information. It utilizes a file system approach to manage large content efficiently. +- New, more efficient caching database built using xxhash and file system approach. +- Introduced `CacheMode` enum (`ENABLED`, `DISABLED`, `READ_ONLY`, `WRITE_ONLY`, `BYPASS`) and `always_bypass_cache` parameter in AsyncWebCrawler for fine-grained cache control. This replaces `bypass_cache`, `no_cache_read`, `no_cache_write`, and `always_by_pass_cache`. + + +### 🗑️ Removals + +- Removed deprecated: `crawl4ai/content_cleaning_strategy.py`. +- Removed internal class ContentCleaningStrategy +- Removed legacy cache control flags: `bypass_cache`, `disable_cache`, `no_cache_read`, `no_cache_write`, and `always_by_pass_cache`. These have been superseded by `cache_mode`. + + +### ⚙️ Other Changes + +- Moved version file to `crawl4ai/__version__.py`. +- Added `crawl4ai/cache_context.py`. +- Added `crawl4ai/version_manager.py`. +- Added `crawl4ai/migrations.py`. +- Added `crawl4ai-migrate` entry point. +- Added config `NEED_MIGRATION` and `SHOW_DEPRECATION_WARNINGS`. +- API server now requires an API token for authentication, configurable with the `CRAWL4AI_API_TOKEN` environment variable. This enhances API security. +- Added synchronous crawl endpoint `/crawl_sync` for immediate result retrieval, and direct crawl endpoint `/crawl_direct` bypassing the task queue. + + +### ⚠️ Deprecation Notices + +- The synchronous version of `WebCrawler` is being phased out. While still available via `crawl4ai[sync]`, it will eventually be removed. Transition to `AsyncWebCrawler` is strongly recommended. Boolean cache control flags in `arun` are also deprecated, migrate to using the `cache_mode` parameter. See examples in the "New Features" section above for correct usage. + + +### 🐛 Bug Fixes + +- Resolved issue with browser context closing unexpectedly in Docker. This significantly improves stability, particularly within containerized environments. +- Fixed memory leaks associated with incorrect asynchronous cleanup by removing the `__del__` method and ensuring the browser context is closed explicitly using context managers. +- Improved error handling in `WebScrapingStrategy`. More detailed error messages and suggestions for debugging will minimize frustration when running into unexpected issues. +- Fixed issue with incorrect text parsing in specific HTML structures. + + +### Example of migrating to the new CacheMode: + +**Old way:** + +```python +crawler = AsyncWebCrawler(always_by_pass_cache=True) +result = await crawler.arun(url="https://example.com", bypass_cache=True) +``` + +**New way:** + +```python +from crawl4ai import CacheMode + +crawler = AsyncWebCrawler(always_bypass_cache=True) +result = await crawler.arun(url="https://example.com", cache_mode=CacheMode.BYPASS) +``` + + +## [0.3.74] - November 13, 2024 + +1. **File Download Processing** (Nov 14, 2024) + - Added capability for users to specify download folders + - Implemented file download tracking in crowd result object + - Created new file: `tests/async/test_async_doanloader.py` + +2. **Content Filtering Improvements** (Nov 14, 2024) + - Introduced Relevance Content Filter as an improvement over Fit Markdown + - Implemented BM25 algorithm for content relevance matching + - Added new file: `crawl4ai/content_filter_strategy.py` + - Removed deprecated: `crawl4ai/content_cleaning_strategy.py` + +3. **Local File and Raw HTML Support** (Nov 13, 2024) + - Added support for processing local files + - Implemented raw HTML input handling in AsyncWebCrawler + - Enhanced `crawl4ai/async_webcrawler.py` with significant performance improvements + +4. **Browser Management Enhancements** (Nov 12, 2024) + - Implemented new async crawler strategy using Playwright + - Introduced ManagedBrowser for better browser session handling + - Added support for persistent browser sessions + - Updated from playwright_stealth to tf-playwright-stealth + +5. **API Server Component** + - Added CORS support + - Implemented static file serving + - Enhanced root redirect functionality + + + +## [0.3.731] - November 13, 2024 + +### Added +- Support for raw HTML and local file crawling via URL prefixes ('raw:', 'file://') +- Browser process monitoring for managed browser instances +- Screenshot capability for raw HTML and local file content +- Response headers storage in cache database +- New `fit_markdown` flag for optional markdown generation + +### Changed +- Switched HTML parser from 'html.parser' to 'lxml' for ~4x performance improvement +- Optimized BeautifulSoup text conversion and element selection +- Pre-compiled regular expressions for better performance +- Improved metadata extraction efficiency +- Response headers now stored alongside HTML in cache + +### Removed +- `__del__` method from AsyncPlaywrightCrawlerStrategy to prevent async cleanup issues + +### Fixed +- Issue #256: Added support for crawling raw HTML content +- Issue #253: Implemented file:// protocol handling +- Missing response headers in cached results +- Memory leaks from improper async cleanup + +## [v0.3.731] - 2024-11-13 Changelog for Issue 256 Fix +- Fixed: Browser context unexpectedly closing in Docker environment during crawl operations. +- Removed: __del__ method from AsyncPlaywrightCrawlerStrategy to prevent unreliable asynchronous cleanup, ensuring - browser context is closed explicitly within context managers. +- Added: Monitoring for ManagedBrowser subprocess to detect and log unexpected terminations. +- Updated: Dockerfile configurations to expose debugging port (9222) and allocate additional shared memory for improved browser stability. +- Improved: Error handling and resource cleanup processes for browser lifecycle management within the Docker environment. + +## [v0.3.73] - 2024-11-05 + +### Major Features +- **New Doctor Feature** + - Added comprehensive system diagnostics tool + - Available through package hub and CLI + - Provides automated troubleshooting and system health checks + - Includes detailed reporting of configuration issues + +- **Dockerized API Server** + - Released complete Docker implementation for API server + - Added comprehensive documentation for Docker deployment + - Implemented container communication protocols + - Added environment configuration guides + +- **Managed Browser Integration** + - Added support for user-controlled browser instances + - Implemented `ManagedBrowser` class for better browser lifecycle management + - Added ability to connect to existing Chrome DevTools Protocol (CDP) endpoints + - Introduced user data directory support for persistent browser profiles + +- **Enhanced HTML Processing** + - Added HTML tag preservation feature during markdown conversion + - Introduced configurable tag preservation system + - Improved pre-tag and code block handling + - Added support for nested preserved tags with attribute retention + +### Improvements +- **Browser Handling** + - Added flag to ignore body visibility for problematic pages + - Improved browser process cleanup and management + - Enhanced temporary directory handling for browser profiles + - Added configurable browser launch arguments + +- **Database Management** + - Implemented connection pooling for better performance + - Added retry logic for database operations + - Improved error handling and logging + - Enhanced cleanup procedures for database connections + +- **Resource Management** + - Added memory and CPU monitoring + - Implemented dynamic task slot allocation based on system resources + - Added configurable cleanup intervals + +### Technical Improvements +- **Code Structure** + - Moved version management to dedicated _version.py file + - Improved error handling throughout the codebase + - Enhanced logging system with better error reporting + - Reorganized core components for better maintainability + +### Bug Fixes +- Fixed issues with browser process termination +- Improved handling of connection timeouts +- Enhanced error recovery in database operations +- Fixed memory leaks in long-running processes + +### Dependencies +- Updated Playwright to v1.47 +- Updated core dependencies with more flexible version constraints +- Added new development dependencies for testing + +### Breaking Changes +- Changed default browser handling behavior +- Modified database connection management approach +- Updated API response structure for better consistency + +### Migration Guide +When upgrading to v0.3.73, be aware of the following changes: + +1. Docker Deployment: + - Review Docker documentation for new deployment options + - Update environment configurations as needed + - Check container communication settings + +2. If using custom browser management: + - Update browser initialization code to use new ManagedBrowser class + - Review browser cleanup procedures + +3. For database operations: + - Check custom database queries for compatibility with new connection pooling + - Update error handling to work with new retry logic + +4. Using the Doctor: + - Run doctor command for system diagnostics: `crawl4ai doctor` + - Review generated reports for potential issues + - Follow recommended fixes for any identified problems + + +## [v0.3.73] - 2024-11-04 +This commit introduces several key enhancements, including improved error handling and robust database operations in `async_database.py`, which now features a connection pool and retry logic for better reliability. Updates to the README.md provide clearer instructions and a better user experience with links to documentation sections. The `.gitignore` file has been refined to include additional directories, while the async web crawler now utilizes a managed browser for more efficient crawling. Furthermore, multiple dependency updates and introduction of the `CustomHTML2Text` class enhance text extraction capabilities. + +## [v0.3.73] - 2024-10-24 + +### Added +- preserve_tags: Added support for preserving specific HTML tags during markdown conversion. +- Smart overlay removal system in AsyncPlaywrightCrawlerStrategy: + - Automatic removal of popups, modals, and cookie notices + - Detection and removal of fixed/sticky position elements + - Cleaning of empty block elements + - Configurable via `remove_overlay_elements` parameter +- Enhanced screenshot capabilities: + - Added `screenshot_wait_for` parameter to control timing + - Improved screenshot handling with existing page context + - Better error handling with fallback error images +- New URL normalization utilities: + - `normalize_url` function for consistent URL formatting + - `is_external_url` function for better link classification +- Custom base directory support for cache storage: + - New `base_directory` parameter in AsyncWebCrawler + - Allows specifying alternative locations for `.crawl4ai` folder + +### Enhanced +- Link handling improvements: + - Better duplicate link detection + - Enhanced internal/external link classification + - Improved handling of special URL protocols + - Support for anchor links and protocol-relative URLs +- Configuration refinements: + - Streamlined social media domain list + - More focused external content filtering +- LLM extraction strategy: + - Added support for separate API base URL via `api_base` parameter + - Better handling of base URLs in configuration + +### Fixed +- Screenshot functionality: + - Resolved issues with screenshot timing and context + - Improved error handling and recovery +- Link processing: + - Fixed URL normalization edge cases + - Better handling of invalid URLs + - Improved error messages for link processing failures + +### Developer Notes +- The overlay removal system uses advanced JavaScript injection for better compatibility +- URL normalization handles special cases like mailto:, tel:, and protocol-relative URLs +- Screenshot system now reuses existing page context for better performance +- Link processing maintains separate dictionaries for internal and external links to ensure uniqueness + +## [v0.3.72] - 2024-10-22 + +### Added +- New `ContentCleaningStrategy` class: + - Smart content extraction based on text density and element scoring + - Automatic removal of boilerplate content + - DOM tree analysis for better content identification + - Configurable thresholds for content detection +- Advanced proxy support: + - Added `proxy_config` option for authenticated proxy connections + - Support for username/password in proxy configuration +- New content output formats: + - `fit_markdown`: Optimized markdown output with main content focus + - `fit_html`: Clean HTML with only essential content + +### Enhanced +- Image source detection: + - Support for multiple image source attributes (`src`, `data-src`, `srcset`, etc.) + - Automatic fallback through potential source attributes + - Smart handling of srcset attribute +- External content handling: + - Made external link exclusion optional (disabled by default) + - Improved detection and handling of social media links + - Better control over external image filtering + +### Fixed +- Image extraction reliability with multiple source attribute checks +- External link and image handling logic for better accuracy + +### Developer Notes +- The new `ContentCleaningStrategy` uses configurable thresholds for customization +- Proxy configuration now supports more complex authentication scenarios +- Content extraction process now provides both regular and optimized outputs + +## [v0.3.72] - 2024-10-20 + +### Fixed +- Added support for parsing Base64 encoded images in WebScrapingStrategy + +### Added +- Forked and integrated a customized version of the html2text library for more control over Markdown generation +- New configuration options for controlling external content: + - Ability to exclude all external links + - Option to specify domains to exclude (default includes major social media platforms) + - Control over excluding external images + +### Changed +- Improved Markdown generation process: + - Added fine-grained control over character escaping in Markdown output + - Enhanced handling of code blocks and pre-formatted text +- Updated `AsyncPlaywrightCrawlerStrategy.close()` method to use a shorter sleep time (0.5 seconds instead of 500) +- Enhanced flexibility in `CosineStrategy` with a more generic `load_HF_embedding_model` function + +### Improved +- Optimized content scraping and processing for better efficiency +- Enhanced error handling and logging in various components + +### Developer Notes +- The customized html2text library is now located within the crawl4ai package +- New configuration options are available in the `config.py` file for external content handling +- The `WebScrapingStrategy` class has been updated to accommodate new external content exclusion options + +## [v0.3.71] - 2024-10-19 + +### Added +- New chunking strategies: + - `OverlappingWindowChunking`: Allows for overlapping chunks of text, useful for maintaining context between chunks. + - Enhanced `SlidingWindowChunking`: Improved to handle edge cases and last chunks more effectively. + +### Changed +- Updated `CHUNK_TOKEN_THRESHOLD` in config to 2048 tokens (2^11) for better compatibility with most LLM models. +- Improved `AsyncPlaywrightCrawlerStrategy.close()` method to use a shorter sleep time (0.5 seconds instead of 500), significantly reducing wait time when closing the crawler. +- Enhanced flexibility in `CosineStrategy`: + - Now uses a more generic `load_HF_embedding_model` function, allowing for easier swapping of embedding models. +- Updated `JsonCssExtractionStrategy` and `JsonXPATHExtractionStrategy` for better JSON-based extraction. + +### Fixed +- Addressed potential issues with the sliding window chunking strategy to ensure all text is properly chunked. + +### Developer Notes +- Added more comprehensive docstrings to chunking strategies for better code documentation. +- Removed hardcoded device setting in `CosineStrategy`, now using the automatically detected device. +- Added a new example in `quickstart_async.py` for generating a knowledge graph from crawled content. + +These updates aim to provide more flexibility in text processing, improve performance, and enhance the overall capabilities of the crawl4ai library. The new chunking strategies, in particular, offer more options for handling large texts in various scenarios. + +## [v0.3.71] - 2024-10-18 + +### Changes +1. **Version Update**: + - Updated version number from 0.3.7 to 0.3.71. + +2. **Crawler Enhancements**: + - Added `sleep_on_close` option to AsyncPlaywrightCrawlerStrategy for delayed browser closure. + - Improved context creation with additional options: + - Enabled `accept_downloads` and `java_script_enabled`. + - Added a cookie to enable cookies by default. + +3. **Error Handling Improvements**: + - Enhanced error messages in AsyncWebCrawler's `arun` method. + - Updated error reporting format for better visibility and consistency. + +4. **Performance Optimization**: + - Commented out automatic page and context closure in `crawl` method to potentially improve performance in certain scenarios. + +### Documentation +- Updated quickstart notebook: + - Changed installation command to use the released package instead of GitHub repository. + - Updated kernel display name. + +### Developer Notes +- Minor code refactoring and cleanup. + +## [v0.3.7] - 2024-10-17 + +### New Features +1. **Enhanced Browser Stealth**: + - Implemented `playwright_stealth` for improved bot detection avoidance. + - Added `StealthConfig` for fine-tuned control over stealth parameters. + +2. **User Simulation**: + - New `simulate_user` option to mimic human-like interactions (mouse movements, clicks, keyboard presses). + +3. **Navigator Override**: + - Added `override_navigator` option to modify navigator properties, further improving bot detection evasion. + +4. **Improved iframe Handling**: + - New `process_iframes` parameter to extract and integrate iframe content into the main page. + +5. **Flexible Browser Selection**: + - Support for choosing between Chromium, Firefox, and WebKit browsers. + +6. **Include Links in Markdown**: + - Added support for including links in Markdown content, by definin g a new flag `include_links_on_markdown` in `crawl` method. + +### Improvements +1. **Better Error Handling**: + - Enhanced error reporting in WebScrapingStrategy with detailed error messages and suggestions. + - Added console message and error logging for better debugging. + +2. **Image Processing Enhancements**: + - Improved image dimension updating and filtering logic. + +3. **Crawling Flexibility**: + - Added support for custom viewport sizes. + - Implemented delayed content retrieval with `delay_before_return_html` parameter. + +4. **Performance Optimization**: + - Adjusted default semaphore count for parallel crawling. + +### Bug Fixes +- Fixed an issue where the HTML content could be empty after processing. + +### Examples +- Added new example `crawl_with_user_simulation()` demonstrating the use of user simulation and navigator override features. + +### Developer Notes +- Refactored code for better maintainability and readability. +- Updated browser launch arguments for improved compatibility and performance. + ## [v0.3.6] - 2024-10-12 ### 1. Improved Crawling Control @@ -35,43 +587,43 @@ - Allows retrieval of content after a specified delay, useful for dynamically loaded content. - **How to use**: Access `result.get_delayed_content(delay_in_seconds)` after crawling. -## Improvements and Optimizations +### Improvements and Optimizations -### 1. AsyncWebCrawler Enhancements +#### 1. AsyncWebCrawler Enhancements - **Flexible Initialization**: Now accepts arbitrary keyword arguments, passed directly to the crawler strategy. - Allows for more customized setups. -### 2. Image Processing Optimization -- Enhanced image handling in WebScrappingStrategy. +#### 2. Image Processing Optimization +- Enhanced image handling in WebScrapingStrategy. - Added filtering for small, invisible, or irrelevant images. - Improved image scoring system for better content relevance. - Implemented JavaScript-based image dimension updating for more accurate representation. -### 3. Database Schema Auto-updates +#### 3. Database Schema Auto-updates - Automatic database schema updates ensure compatibility with the latest version. -### 4. Enhanced Error Handling and Logging +#### 4. Enhanced Error Handling and Logging - Improved error messages and logging for easier debugging. -### 5. Content Extraction Refinements +#### 5. Content Extraction Refinements - Refined HTML sanitization process. - Improved handling of base64 encoded images. - Enhanced Markdown conversion process. - Optimized content extraction algorithms. -### 6. Utility Function Enhancements +#### 6. Utility Function Enhancements - `perform_completion_with_backoff` function now supports additional arguments for more customized API calls to LLM providers. -## Bug Fixes +### Bug Fixes - Fixed an issue where image tags were being prematurely removed during content extraction. -## Examples and Documentation +### Examples and Documentation - Updated `quickstart_async.py` with examples of: - Using custom headers in LLM extraction. - Different LLM provider usage (OpenAI, Hugging Face, Ollama). - Custom browser type usage. -## Developer Notes +### Developer Notes - Refactored code for better maintainability, flexibility, and performance. - Enhanced type hinting throughout the codebase for improved development experience. - Expanded error handling for more robust operation. diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..bd71dea --- /dev/null +++ b/Dockerfile @@ -0,0 +1,129 @@ +# syntax=docker/dockerfile:1.4 + +# Build arguments +ARG PYTHON_VERSION=3.10 + +# Base stage with system dependencies +FROM python:${PYTHON_VERSION}-slim as base + +# Declare ARG variables again within the build stage +ARG INSTALL_TYPE=all +ARG ENABLE_GPU=false + +# Platform-specific labels +LABEL maintainer="unclecode" +LABEL description="🔥🕷️ Crawl4AI: Open-source LLM Friendly Web Crawler & scraper" +LABEL version="1.0" + +# Environment setup +ENV PYTHONUNBUFFERED=1 \ + PYTHONDONTWRITEBYTECODE=1 \ + PIP_NO_CACHE_DIR=1 \ + PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PIP_DEFAULT_TIMEOUT=100 \ + DEBIAN_FRONTEND=noninteractive + +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + build-essential \ + curl \ + wget \ + gnupg \ + git \ + cmake \ + pkg-config \ + python3-dev \ + libjpeg-dev \ + libpng-dev \ + && rm -rf /var/lib/apt/lists/* + +# Playwright system dependencies for Linux +RUN apt-get update && apt-get install -y --no-install-recommends \ + libglib2.0-0 \ + libnss3 \ + libnspr4 \ + libatk1.0-0 \ + libatk-bridge2.0-0 \ + libcups2 \ + libdrm2 \ + libdbus-1-3 \ + libxcb1 \ + libxkbcommon0 \ + libx11-6 \ + libxcomposite1 \ + libxdamage1 \ + libxext6 \ + libxfixes3 \ + libxrandr2 \ + libgbm1 \ + libpango-1.0-0 \ + libcairo2 \ + libasound2 \ + libatspi2.0-0 \ + && rm -rf /var/lib/apt/lists/* + +# GPU support if enabled and architecture is supported +RUN if [ "$ENABLE_GPU" = "true" ] && [ "$(dpkg --print-architecture)" != "arm64" ] ; then \ + apt-get update && apt-get install -y --no-install-recommends \ + nvidia-cuda-toolkit \ + && rm -rf /var/lib/apt/lists/* ; \ + else \ + echo "Skipping NVIDIA CUDA Toolkit installation (unsupported architecture or GPU disabled)"; \ + fi + +# Create and set working directory +WORKDIR /app + +# Copy the entire project +COPY . . + +# Install base requirements +RUN pip install --no-cache-dir -r requirements.txt + +# Install required library for FastAPI +RUN pip install fastapi uvicorn psutil + +# Install ML dependencies first for better layer caching +RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ + pip install --no-cache-dir \ + torch \ + torchvision \ + torchaudio \ + scikit-learn \ + nltk \ + transformers \ + tokenizers && \ + python -m nltk.downloader punkt stopwords ; \ + fi + +# Install the package +RUN if [ "$INSTALL_TYPE" = "all" ] ; then \ + pip install ".[all]" && \ + python -m crawl4ai.model_loader ; \ + elif [ "$INSTALL_TYPE" = "torch" ] ; then \ + pip install ".[torch]" ; \ + elif [ "$INSTALL_TYPE" = "transformer" ] ; then \ + pip install ".[transformer]" && \ + python -m crawl4ai.model_loader ; \ + else \ + pip install "." ; \ + fi + + # Install MkDocs and required plugins +RUN pip install --no-cache-dir \ + mkdocs \ + mkdocs-material \ + mkdocs-terminal \ + pymdown-extensions + +# Build MkDocs documentation +RUN mkdocs build + +# Install Playwright and browsers +RUN playwright install + +# Expose port +EXPOSE 8000 11235 9222 8080 + +# Start the FastAPI server +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "11235"] \ No newline at end of file diff --git a/MISSION.md b/MISSION.md new file mode 100644 index 0000000..726f250 --- /dev/null +++ b/MISSION.md @@ -0,0 +1,46 @@ +# Mission + +![Mission Diagram](./docs/assets/pitch-dark.svg) + +### 1. The Data Capitalization Opportunity + +We live in an unprecedented era of digital wealth creation. Every day, individuals and enterprises generate massive amounts of valuable digital footprints across various platforms, social media channels, messenger apps, and cloud services. While people can interact with their data within these platforms, there's an immense untapped opportunity to transform this data into true capital assets. Just as physical property became a foundational element of wealth creation, personal and enterprise data has the potential to become a new form of capital on balance sheets. + +For individuals, this represents an opportunity to transform their digital activities into valuable assets. For enterprises, their internal communications, team discussions, and collaborative documents contain rich insights that could be structured and valued as intellectual capital. This wealth of information represents an unprecedented opportunity for value creation in the digital age. + +### 2. The Potential of Authentic Data + +While synthetic data has played a crucial role in AI development, there's an enormous untapped potential in the authentic data generated by individuals and organizations. Every message, document, and interaction contains unique insights and patterns that could enhance AI development. The challenge isn't a lack of data - it's that most authentic human-generated data remains inaccessible for productive use. + +By enabling willing participation in data sharing, we can unlock this vast reservoir of authentic human knowledge. This represents an opportunity to enhance AI development with diverse, real-world data that reflects the full spectrum of human experience and knowledge. + +## Our Pathway to Data Democracy + +### 1. Open-Source Foundation + +Our first step is creating an open-source data extraction engine that empowers developers and innovators to build tools for data structuring and organization. This foundation ensures transparency, security, and community-driven development. By making these tools openly available, we enable the technical infrastructure needed for true data ownership and capitalization. + +### 2. Data Capitalization Platform + +Building on this open-source foundation, we're developing a platform that helps individuals and enterprises transform their digital footprints into structured, valuable assets. This platform will provide the tools and frameworks needed to organize, understand, and value personal and organizational data as true capital assets. + +### 3. Creating a Data Marketplace + +The final piece is establishing a marketplace where individuals and organizations can willingly share their data assets. This creates opportunities for: +- Individuals to earn equity, revenue, or other forms of value from their data +- Enterprises to access diverse, high-quality data for AI development +- Researchers to work with authentic human-generated data +- Startups to build innovative solutions using real-world data + +## Economic Vision: A Shared Data Economy + +We envision a future where data becomes a fundamental asset class in a thriving shared economy. This transformation will democratize AI development by enabling willing participation in data sharing, ensuring that the benefits of AI advancement flow back to data creators. Just as property rights revolutionized economic systems, establishing data as a capital asset will create new opportunities for wealth creation and economic participation. + +This shared data economy will: +- Enable individuals to capitalize on their digital footprints +- Create new revenue streams for data creators +- Provide AI developers with access to diverse, authentic data +- Foster innovation through broader access to real-world data +- Ensure more equitable distribution of AI's economic benefits + +Our vision is to facilitate this transformation from the ground up - starting with open-source tools, progressing to data capitalization platforms, and ultimately creating a thriving marketplace where data becomes a true asset class in a shared economy. This approach ensures that the future of AI is built on a foundation of authentic human knowledge, with benefits flowing back to the individuals and organizations who create and share their valuable data. \ No newline at end of file diff --git a/README.md b/README.md index a8c2b9b..fa88a50 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,9 @@ -# Crawl4AI (Async Version) 🕷️🤖 +# 🔥🕷️ Crawl4AI: LLM Friendly Web Crawler & Scraper + +unclecode%2Fcrawl4ai | Trendshift [![GitHub Stars](https://img.shields.io/github/stars/unclecode/crawl4ai?style=social)](https://github.com/unclecode/crawl4ai/stargazers) +![PyPI - Downloads](https://img.shields.io/pypi/dm/Crawl4AI) [![GitHub Forks](https://img.shields.io/github/forks/unclecode/crawl4ai?style=social)](https://github.com/unclecode/crawl4ai/network/members) [![GitHub Issues](https://img.shields.io/github/issues/unclecode/crawl4ai)](https://github.com/unclecode/crawl4ai/issues) [![GitHub Pull Requests](https://img.shields.io/github/issues-pr/unclecode/crawl4ai)](https://github.com/unclecode/crawl4ai/pulls) @@ -8,20 +11,25 @@ Crawl4AI simplifies asynchronous web crawling and data extraction, making it accessible for large language models (LLMs) and AI applications. 🆓🌐 -> Looking for the synchronous version? Check out [README.sync.md](./README.sync.md). You can also access the previous version in the branch [V0.2.76](https://github.com/unclecode/crawl4ai/blob/v0.2.76). +## New in 0.3.74 ✨ + +- 🚀 **Blazing Fast Scraping**: Significantly improved scraping speed. +- 📥 **Download Manager**: Integrated file crawling, downloading, and tracking within `CrawlResult`. +- 📝 **Markdown Strategy**: Flexible system for custom markdown generation and formats. +- 🔗 **LLM-Friendly Citations**: Auto-converts links to numbered citations with reference lists. +- 🔎 **Markdown Filter**: BM25-based content extraction for cleaner, relevant markdown. +- 🖼️ **Image Extraction**: Supports `srcset`, `picture`, and responsive image formats. +- 🗂️ **Local/Raw HTML**: Crawl `file://` paths and raw HTML (`raw:`) directly. +- 🤖 **Browser Control**: Custom browser setups with stealth integration to bypass bots. +- ☁️ **API & Cache Boost**: CORS, static serving, and enhanced filesystem-based caching. +- 🐳 **API Gateway**: Run as an API service with secure token authentication. +- 🛠️ **Database Upgrades**: Optimized for larger content sets with faster caching. +- 🐛 **Bug Fixes**: Resolved browser context issues, memory leaks, and improved error handling. -## New update 0.3.6 -- 🌐 Multi-browser support (Chromium, Firefox, WebKit) -- 🖼️ Improved image processing with lazy-loading detection -- 🔧 Custom page timeout parameter for better control over crawling behavior -- 🕰️ Enhanced handling of delayed content loading -- 🔑 Custom headers support for LLM interactions -- 🖼️ iframe content extraction for comprehensive page analysis -- ⏱️ Flexible timeout and delayed content retrieval options ## Try it Now! -✨ Play around with this [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1REChY6fXQf-EaVYLv0eHEWvzlYxGm0pd?usp=sharing) +✨ Play around with this [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1SgRPrByQLzjRfwoRNq1wSGE9nYY_EE8C?usp=sharing) ✨ Visit our [Documentation Website](https://crawl4ai.com/mkdocs/) @@ -30,22 +38,28 @@ Crawl4AI simplifies asynchronous web crawling and data extraction, making it acc - 🆓 Completely free and open-source - 🚀 Blazing fast performance, outperforming many paid services - 🤖 LLM-friendly output formats (JSON, cleaned HTML, markdown) +- 🌐 Multi-browser support (Chromium, Firefox, WebKit) - 🌍 Supports crawling multiple URLs simultaneously - 🎨 Extracts and returns all media tags (Images, Audio, and Video) - 🔗 Extracts all external and internal links - 📚 Extracts metadata from the page -- 🔄 Custom hooks for authentication, headers, and page modifications before crawling +- 🔄 Custom hooks for authentication, headers, and page modifications - 🕵️ User-agent customization -- 🖼️ Takes screenshots of the page +- 🖼️ Takes screenshots of pages with enhanced error handling - 📜 Executes multiple custom JavaScripts before crawling - 📊 Generates structured output without LLM using JsonCssExtractionStrategy - 📚 Various chunking strategies: topic-based, regex, sentence, and more - 🧠 Advanced extraction strategies: cosine clustering, LLM, and more - 🎯 CSS selector support for precise data extraction - 📝 Passes instructions/keywords to refine extraction -- 🔒 Proxy support for enhanced privacy and access -- 🔄 Session management for complex multi-page crawling scenarios -- 🌐 Asynchronous architecture for improved performance and scalability +- 🔒 Proxy support with authentication for enhanced access +- 🔄 Session management for complex multi-page crawling +- 🌐 Asynchronous architecture for improved performance +- 🖼️ Improved image processing with lazy-loading detection +- 🕰️ Enhanced handling of delayed content loading +- 🔑 Custom headers support for LLM interactions +- 🖼️ iframe content extraction for comprehensive analysis +- ⏱️ Flexible timeout and delayed content retrieval options ## Installation 🛠️ @@ -68,11 +82,13 @@ By default, this will install the asynchronous version of Crawl4AI, using Playwr 👉 Note: When you install Crawl4AI, the setup script should automatically install and set up Playwright. However, if you encounter any Playwright-related errors, you can manually install it using one of these methods: 1. Through the command line: + ```bash playwright install ``` 2. If the above doesn't work, try this more specific command: + ```bash python -m playwright install chromium ``` @@ -97,11 +113,72 @@ cd crawl4ai pip install -e . ``` +## One-Click Deployment 🚀 + +Deploy your own instance of Crawl4AI with one click: + +[![DigitalOcean Referral Badge](https://web-platforms.sfo2.cdn.digitaloceanspaces.com/WWW/Badge%203.svg)](https://www.digitalocean.com/?repo=https://github.com/unclecode/crawl4ai/tree/0.3.74&refcode=a0780f1bdb3d&utm_campaign=Referral_Invite&utm_medium=Referral_Program&utm_source=badge) + +> 💡 **Recommended specs**: 4GB RAM minimum. Select "professional-xs" or higher when deploying for stable operation. + +The deploy will: +- Set up a Docker container with Crawl4AI +- Configure Playwright and all dependencies +- Start the FastAPI server on port 11235 +- Set up health checks and auto-deployment + ### Using Docker 🐳 -We're in the process of creating Docker images and pushing them to Docker Hub. This will provide an easy way to run Crawl4AI in a containerized environment. Stay tuned for updates! +Crawl4AI is available as Docker images for easy deployment. You can either pull directly from Docker Hub (recommended) or build from the repository. + +#### Option 1: Docker Hub (Recommended) + +```bash +# Pull and run from Docker Hub (choose one): +docker pull unclecode/crawl4ai:basic # Basic crawling features +docker pull unclecode/crawl4ai:all # Full installation (ML, LLM support) +docker pull unclecode/crawl4ai:gpu # GPU-enabled version + +# Run the container +docker run -p 11235:11235 unclecode/crawl4ai:basic # Replace 'basic' with your chosen version + +# In case to allocate more shared memory for the container +docker run --shm-size=2gb -p 11235:11235 unclecode/crawl4ai:basic +``` + +#### Option 2: Build from Repository + +```bash +# Clone the repository +git clone https://github.com/unclecode/crawl4ai.git +cd crawl4ai + +# Build the image +docker build -t crawl4ai:local \ + --build-arg INSTALL_TYPE=basic \ # Options: basic, all + . + +# Run your local build +docker run -p 11235:11235 crawl4ai:local +``` + +Quick test (works for both options): +```python +import requests + +# Submit a crawl job +response = requests.post( + "http://localhost:11235/crawl", + json={"urls": "https://example.com", "priority": 10} +) +task_id = response.json()["task_id"] + +# Get results +result = requests.get(f"http://localhost:11235/task/{task_id}") +``` + +For advanced configuration, environment variables, and usage examples, see our [Docker Deployment Guide](https://crawl4ai.com/mkdocs/basic/docker-deployment/). -For more detailed installation instructions and options, please refer to our [Installation Guide](https://crawl4ai.com/mkdocs/installation). ## Quick Start 🚀 @@ -231,7 +308,7 @@ if __name__ == "__main__": asyncio.run(extract_news_teasers()) ``` -For more advanced usage examples, check out our [Examples](https://crawl4ai.com/mkdocs/full_details/advanced_jsoncss_extraction.md) section in the documentation. +For more advanced usage examples, check out our [Examples](https://crawl4ai.com/mkdocs/extraction/css-advanced/) section in the documentation. ### Extracting Structured Data with OpenAI @@ -334,7 +411,8 @@ if __name__ == "__main__": This example demonstrates Crawl4AI's ability to handle complex scenarios where content is loaded asynchronously. It crawls multiple pages of GitHub commits, executing JavaScript to load new content and using custom hooks to ensure data is loaded before proceeding. -For more advanced usage examples, check out our [Examples](https://crawl4ai.com/mkdocs/full_details/session_based_crawling.md) section in the documentation. +For more advanced usage examples, check out our [Examples](https://crawl4ai.com/mkdocs/tutorial/episode_12_Session-Based_Crawling_for_Dynamic_Websites/) section in the documentation. + ## Speed Comparison 🚀 @@ -343,7 +421,7 @@ Crawl4AI is designed with speed as a primary focus. Our goal is to provide the f We've conducted a speed comparison between Crawl4AI and Firecrawl, a paid service. The results demonstrate Crawl4AI's superior performance: -``` +```bash Firecrawl: Time taken: 7.02 seconds Content length: 42074 characters @@ -361,6 +439,7 @@ Images found: 89 ``` As you can see, Crawl4AI outperforms Firecrawl significantly: + - Simple crawl: Crawl4AI is over 4 times faster than Firecrawl. - With JavaScript execution: Even when executing JavaScript to load more content (doubling the number of images found), Crawl4AI is still faster than Firecrawl's simple crawl. @@ -370,6 +449,30 @@ You can find the full comparison code in our repository at `docs/examples/crawl4 For detailed documentation, including installation instructions, advanced features, and API reference, visit our [Documentation Website](https://crawl4ai.com/mkdocs/). +## Crawl4AI Roadmap 🗺️ + +For detailed information on our development plans and upcoming features, check out our [Roadmap](https://github.com/unclecode/crawl4ai/blob/main/ROADMAP.md). + +### Advanced Crawling Systems 🔧 +- [x] 0. Graph Crawler: Smart website traversal using graph search algorithms for comprehensive nested page extraction +- [ ] 1. Question-Based Crawler: Natural language driven web discovery and content extraction +- [ ] 2. Knowledge-Optimal Crawler: Smart crawling that maximizes knowledge while minimizing data extraction +- [ ] 3. Agentic Crawler: Autonomous system for complex multi-step crawling operations + +### Specialized Features 🛠️ +- [ ] 4. Automated Schema Generator: Convert natural language to extraction schemas +- [ ] 5. Domain-Specific Scrapers: Pre-configured extractors for common platforms (academic, e-commerce) +- [ ] 6. Web Embedding Index: Semantic search infrastructure for crawled content + +### Development Tools 🔨 +- [ ] 7. Interactive Playground: Web UI for testing, comparing strategies with AI assistance +- [ ] 8. Performance Monitor: Real-time insights into crawler operations +- [ ] 9. Cloud Integration: One-click deployment solutions across cloud providers + +### Community & Growth 🌱 +- [ ] 10. Sponsorship Program: Structured support system with tiered benefits +- [ ] 11. Educational Content: "How to Crawl" video series and interactive tutorials + ## Contributing 🤝 We welcome contributions from the open-source community. Check out our [contribution guidelines](https://github.com/unclecode/crawl4ai/blob/main/CONTRIBUTING.md) for more information. @@ -388,6 +491,34 @@ For questions, suggestions, or feedback, feel free to reach out: Happy Crawling! 🕸️🚀 + +# Mission + +Our mission is to unlock the untapped potential of personal and enterprise data in the digital age. In today's world, individuals and organizations generate vast amounts of valuable digital footprints, yet this data remains largely uncapitalized as a true asset. + +Our open-source solution empowers developers and innovators to build tools for data extraction and structuring, laying the foundation for a new era of data ownership. By transforming personal and enterprise data into structured, tradeable assets, we're creating opportunities for individuals to capitalize on their digital footprints and for organizations to unlock the value of their collective knowledge. + +This democratization of data represents the first step toward a shared data economy, where willing participation in data sharing drives AI advancement while ensuring the benefits flow back to data creators. Through this approach, we're building a future where AI development is powered by authentic human knowledge rather than synthetic alternatives. + +![Mission Diagram](./docs/assets/pitch-dark.svg) + +For a detailed exploration of our vision, opportunities, and pathway forward, please see our [full mission statement](./MISSION.md). + +## Key Opportunities + +- **Data Capitalization**: Transform digital footprints into valuable assets that can appear on personal and enterprise balance sheets +- **Authentic Data**: Unlock the vast reservoir of real human insights and knowledge for AI advancement +- **Shared Economy**: Create new value streams where data creators directly benefit from their contributions + +## Development Pathway + +1. **Open-Source Foundation**: Building transparent, community-driven data extraction tools +2. **Data Capitalization Platform**: Creating tools to structure and value digital assets +3. **Shared Data Marketplace**: Establishing an economic platform for ethical data exchange + +For a detailed exploration of our vision, challenges, and solutions, please see our [full mission statement](./MISSION.md). + + ## Star History -[![Star History Chart](https://api.star-history.com/svg?repos=unclecode/crawl4ai&type=Date)](https://star-history.com/#unclecode/crawl4ai&Date) \ No newline at end of file +[![Star History Chart](https://api.star-history.com/svg?repos=unclecode/crawl4ai&type=Date)](https://star-history.com/#unclecode/crawl4ai&Date) diff --git a/ROADMAP.md b/ROADMAP.md new file mode 100644 index 0000000..0fd784c --- /dev/null +++ b/ROADMAP.md @@ -0,0 +1,503 @@ +# Crawl4AI Strategic Roadmap + +```mermaid +%%{init: {'themeVariables': { 'fontSize': '14px'}}}%% +graph TD + subgraph A1[Advanced Crawling Systems 🔧] + A["` + • Graph Crawler ✓ + • Question-Based Crawler + • Knowledge-Optimal Crawler + • Agentic Crawler + `"] + end + + subgraph A2[Specialized Features 🛠️] + B["` + • Automated Schema Generator + • Domain-Specific Scrapers + • + • + `"] + end + + subgraph A3[Development Tools 🔨] + C["` + • Interactive Playground + • Performance Monitor + • Cloud Integration + • + `"] + end + + subgraph A4[Community & Growth 🌱] + D["` + • Sponsorship Program + • Educational Content + • + • + `"] + end + + classDef default fill:#f9f9f9,stroke:#333,stroke-width:2px + classDef section fill:#f0f0f0,stroke:#333,stroke-width:4px,rx:10 + class A1,A2,A3,A4 section + + %% Layout hints + A1 --> A2[" "] + A3 --> A4[" "] + linkStyle 0,1 stroke:none +``` + +Crawl4AI is evolving to provide more intelligent, efficient, and versatile web crawling capabilities. This roadmap outlines the key developments and features planned for the project, organized into strategic sections that build upon our current foundation. + +## 1. Advanced Crawling Systems 🔧 + +This section introduces three powerful crawling systems that extend Crawl4AI's capabilities from basic web crawling to intelligent, purpose-driven data extraction. + +### 1.1 Question-Based Crawler +The Question-Based Crawler enhances our core engine by enabling automatic discovery and extraction of relevant web content based on natural language questions. + +Key Features: +- SerpiAPI integration for intelligent web search +- Relevancy scoring for search results +- Automatic URL discovery and prioritization +- Cross-source validation + +```python +from crawl4ai import AsyncWebCrawler +from crawl4ai.discovery import QuestionBasedDiscovery + +async with AsyncWebCrawler() as crawler: + discovery = QuestionBasedDiscovery(crawler) + results = await discovery.arun( + question="What are the system requirements for major cloud providers' GPU instances?", + max_urls=5, + relevance_threshold=0.7 + ) + + for result in results: + print(f"Source: {result.url} (Relevance: {result.relevance_score})") + print(f"Content: {result.markdown}\n") +``` + +### 1.2 Knowledge-Optimal Crawler +An intelligent crawling system that solves the optimization problem of minimizing data extraction while maximizing knowledge acquisition for specific objectives. + +Key Features: +- Smart content prioritization +- Minimal data extraction for maximum knowledge +- Probabilistic relevance assessment +- Objective-driven crawling paths + +```python +from crawl4ai import AsyncWebCrawler +from crawl4ai.optimization import KnowledgeOptimizer + +async with AsyncWebCrawler() as crawler: + optimizer = KnowledgeOptimizer( + objective="Understand GPU instance pricing and limitations across cloud providers", + required_knowledge=[ + "pricing structure", + "GPU specifications", + "usage limits", + "availability zones" + ], + confidence_threshold=0.85 + ) + + result = await crawler.arun( + urls=[ + "https://aws.amazon.com/ec2/pricing/", + "https://cloud.google.com/gpu", + "https://azure.microsoft.com/pricing/" + ], + optimizer=optimizer, + optimization_mode="minimal_extraction" + ) + + print(f"Knowledge Coverage: {result.knowledge_coverage}") + print(f"Data Efficiency: {result.efficiency_ratio}") + print(f"Extracted Content: {result.optimal_content}") +``` + +### 1.3 Agentic Crawler +An autonomous system capable of understanding complex goals and automatically planning and executing multi-step crawling operations. + +Key Features: +- Autonomous goal interpretation +- Dynamic step planning +- Interactive navigation capabilities +- Visual recognition and interaction +- Automatic error recovery + +```python +from crawl4ai import AsyncWebCrawler +from crawl4ai.agents import CrawlerAgent + +async with AsyncWebCrawler() as crawler: + agent = CrawlerAgent(crawler) + + # Automatic planning and execution + result = await agent.arun( + goal="Find research papers about quantum computing published in 2023 with more than 50 citations", + auto_retry=True + ) + print("Generated Plan:", result.executed_steps) + print("Extracted Data:", result.data) + + # Using custom steps with automatic execution + result = await agent.arun( + goal="Extract conference deadlines from ML conferences", + custom_plan=[ + "Navigate to conference page", + "Find important dates section", + "Extract submission deadlines", + "Verify dates are for 2024" + ] + ) + + # Monitoring execution + print("Step Completion:", result.step_status) + print("Execution Time:", result.execution_time) + print("Success Rate:", result.success_rate) +``` + +# Section 2: Specialized Features 🛠️ + +This section introduces specialized tools and features that enhance Crawl4AI's capabilities for specific use cases and data extraction needs. + +### 2.1 Automated Schema Generator +A system that automatically generates JsonCssExtractionStrategy schemas from natural language descriptions, making structured data extraction accessible to all users. + +Key Features: +- Natural language schema generation +- Automatic pattern detection +- Predefined schema templates +- Chrome extension for visual schema building + +```python +from crawl4ai import AsyncWebCrawler +from crawl4ai.schema import SchemaGenerator + +# Generate schema from natural language description +generator = SchemaGenerator() +schema = await generator.generate( + url="https://news-website.com", + description="For each news article on the page, I need the headline, publication date, and main image" +) + +# Use generated schema with crawler +async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://news-website.com", + extraction_strategy=schema + ) + +# Example of generated schema: +""" +{ + "name": "News Article Extractor", + "baseSelector": "article.news-item", + "fields": [ + { + "name": "headline", + "selector": "h2.article-title", + "type": "text" + }, + { + "name": "date", + "selector": "span.publish-date", + "type": "text" + }, + { + "name": "image", + "selector": "img.article-image", + "type": "attribute", + "attribute": "src" + } + ] +} +""" +``` + +### 2.2 Domain Specific Scrapers +Specialized extraction strategies optimized for common website types and platforms, providing consistent and reliable data extraction without additional configuration. + +Key Features: +- Pre-configured extractors for popular platforms +- Academic site specialization (arXiv, NCBI) +- E-commerce standardization +- Documentation site handling + +```python +from crawl4ai import AsyncWebCrawler +from crawl4ai.extractors import AcademicExtractor, EcommerceExtractor + +async with AsyncWebCrawler() as crawler: + # Academic paper extraction + papers = await crawler.arun( + url="https://arxiv.org/list/cs.AI/recent", + extractor="academic", # Built-in extractor type + site_type="arxiv", # Specific site optimization + extract_fields=[ + "title", + "authors", + "abstract", + "citations" + ] + ) + + # E-commerce product data + products = await crawler.arun( + url="https://store.example.com/products", + extractor="ecommerce", + extract_fields=[ + "name", + "price", + "availability", + "reviews" + ] + ) +``` + +### 2.3 Web Embedding Index +Creates and maintains a semantic search infrastructure for crawled content, enabling efficient retrieval and querying of web content through vector embeddings. + +Key Features: +- Automatic embedding generation +- Intelligent content chunking +- Efficient vector storage and indexing +- Semantic search capabilities + +```python +from crawl4ai import AsyncWebCrawler +from crawl4ai.indexing import WebIndex + +# Initialize and build index +index = WebIndex(model="efficient-mini") + +async with AsyncWebCrawler() as crawler: + # Crawl and index content + await index.build( + urls=["https://docs.example.com"], + crawler=crawler, + options={ + "chunk_method": "semantic", + "update_policy": "incremental", + "embedding_batch_size": 100 + } + ) + + # Search through indexed content + results = await index.search( + query="How to implement OAuth authentication?", + filters={ + "content_type": "technical", + "recency": "6months" + }, + top_k=5 + ) + + # Get similar content + similar = await index.find_similar( + url="https://docs.example.com/auth/oauth", + threshold=0.85 + ) +``` + +Each of these specialized features builds upon Crawl4AI's core functionality while providing targeted solutions for specific use cases. They can be used independently or combined for more complex data extraction and processing needs. + +# Section 3: Development Tools 🔧 + +This section covers tools designed to enhance the development experience, monitoring, and deployment of Crawl4AI applications. + +### 3.1 Crawl4AI Playground 🎮 + +The Crawl4AI Playground is an interactive web-based development environment that simplifies web scraping experimentation, development, and deployment. With its intuitive interface and AI-powered assistance, users can quickly prototype, test, and deploy web scraping solutions. + +#### Key Features 🌟 + +##### Visual Strategy Builder +- Interactive point-and-click interface for building extraction strategies +- Real-time preview of selected elements +- Side-by-side comparison of different extraction approaches +- Visual validation of CSS selectors and XPath queries + +##### AI Assistant Integration +- Strategy recommendations based on target website analysis +- Parameter optimization suggestions +- Best practices guidance for specific use cases +- Automated error detection and resolution +- Performance optimization tips + +##### Real-Time Testing & Validation +- Live preview of extraction results +- Side-by-side comparison of multiple strategies +- Performance metrics visualization +- Automatic validation of extracted data +- Error detection and debugging tools + +##### Project Management +- Save and organize multiple scraping projects +- Version control for configurations +- Export/import project settings +- Share configurations with team members +- Project templates for common use cases + +##### Deployment Pipeline +- One-click deployment to various environments +- Docker container generation +- Cloud deployment templates (AWS, GCP, Azure) +- Scaling configuration management +- Monitoring setup automation + + +### 3.2 Performance Monitoring System +A comprehensive monitoring solution providing real-time insights into crawler operations, resource usage, and system health through both CLI and GUI interfaces. + +Key Features: +- Real-time resource tracking +- Active crawl monitoring +- Performance statistics +- Customizable alerting system + +```python +from crawl4ai import AsyncWebCrawler +from crawl4ai.monitor import CrawlMonitor + +# Initialize monitoring +monitor = CrawlMonitor() + +# Start monitoring with CLI interface +await monitor.start( + mode="cli", # or "gui" + refresh_rate="1s", + metrics={ + "resources": ["cpu", "memory", "network"], + "crawls": ["active", "queued", "completed"], + "performance": ["success_rate", "response_times"] + } +) + +# Example CLI output: +""" +Crawl4AI Monitor (Live) - Press Q to exit +──────────────────────────────────────── +System Usage: + ├─ CPU: ███████░░░ 70% + └─ Memory: ████░░░░░ 2.1GB/8GB + +Active Crawls: +ID URL Status Progress +001 docs.example.com 🟢 Active 75% +002 api.service.com 🟡 Queue - + +Metrics (Last 5min): + ├─ Success Rate: 98% + ├─ Avg Response: 0.6s + └─ Pages/sec: 8.5 +""" +``` + +### 3.3 Cloud Integration +Streamlined deployment tools for setting up Crawl4AI in various cloud environments, with support for scaling and monitoring. + +Key Features: +- One-click deployment solutions +- Auto-scaling configuration +- Load balancing setup +- Cloud-specific optimizations +- Monitoring integration + +```python +from crawl4ai import AsyncWebCrawler +from crawl4ai.deploy import CloudDeployer + +# Initialize deployer +deployer = CloudDeployer() + +# Deploy crawler service +deployment = await deployer.deploy( + service_name="crawler-cluster", + platform="aws", # or "gcp", "azure" + config={ + "instance_type": "compute-optimized", + "auto_scaling": { + "min_instances": 2, + "max_instances": 10, + "scale_based_on": "cpu_usage" + }, + "region": "us-east-1", + "monitoring": True + } +) + +# Get deployment status and endpoints +print(f"Service Status: {deployment.status}") +print(f"API Endpoint: {deployment.endpoint}") +print(f"Monitor URL: {deployment.monitor_url}") +``` + +These development tools work together to provide a comprehensive environment for developing, testing, monitoring, and deploying Crawl4AI applications. The Playground helps users experiment and generate optimal configurations, the Performance Monitor ensures smooth operation, and the Cloud Integration tools simplify deployment and scaling. + +# Section 4: Community & Growth 🌱 + +This section outlines initiatives designed to build and support the Crawl4AI community, provide educational resources, and ensure sustainable project growth. + +### 4.1 Sponsorship Program +A structured program to support ongoing development and maintenance of Crawl4AI while providing valuable benefits to sponsors. + +Key Features: +- Multiple sponsorship tiers +- Sponsor recognition system +- Priority support for sponsors +- Early access to new features +- Custom feature development opportunities + +Program Structure (not yet finalized): +``` +Sponsorship Tiers: + +🥉 Bronze Supporter +- GitHub Sponsor badge +- Priority issue response +- Community Discord role + +🥈 Silver Supporter +- All Bronze benefits +- Technical support channel +- Vote on roadmap priorities +- Early access to beta features + +🥇 Gold Supporter +- All Silver benefits +- Custom feature requests +- Direct developer access +- Private support sessions + +💎 Diamond Partner +- All Gold benefits +- Custom development +- On-demand consulting +- Integration support +``` + +### 4.2 "How to Crawl" Video Series +A comprehensive educational resource teaching users how to effectively use Crawl4AI for various web scraping and data extraction scenarios. + +Key Features: +- Step-by-step tutorials +- Real-world use cases +- Best practices +- Integration guides +- Advanced feature deep-dives + +These community initiatives are designed to: +- Provide comprehensive learning resources +- Foster a supportive user community +- Ensure sustainable project development +- Share knowledge and best practices +- Create opportunities for collaboration + +The combination of structured support through sponsorship, educational content through video series, and interactive learning through the playground creates a robust ecosystem for both new and experienced users of Crawl4AI. diff --git a/crawl4ai/__init__.py b/crawl4ai/__init__.py index 04da30f..0ccf13d 100644 --- a/crawl4ai/__init__.py +++ b/crawl4ai/__init__.py @@ -1,13 +1,15 @@ # __init__.py -from .async_webcrawler import AsyncWebCrawler -from .models import CrawlResult +from .async_webcrawler import AsyncWebCrawler, CacheMode -__version__ = "0.3.6" +from .models import CrawlResult +from .__version__ import __version__ +# __version__ = "0.3.73" __all__ = [ "AsyncWebCrawler", "CrawlResult", + "CacheMode", ] def is_sync_version_installed(): @@ -26,5 +28,5 @@ def is_sync_version_installed(): print("Warning: Failed to import WebCrawler even though selenium is installed. This might be due to other missing dependencies.") else: WebCrawler = None - import warnings - print("Warning: Synchronous WebCrawler is not available. Install crawl4ai[sync] for synchronous support. However, please note that the synchronous version will be deprecated soon.") \ No newline at end of file + # import warnings + # print("Warning: Synchronous WebCrawler is not available. Install crawl4ai[sync] for synchronous support. However, please note that the synchronous version will be deprecated soon.") \ No newline at end of file diff --git a/crawl4ai/__version__.py b/crawl4ai/__version__.py new file mode 100644 index 0000000..65ee6e7 --- /dev/null +++ b/crawl4ai/__version__.py @@ -0,0 +1,2 @@ +# crawl4ai/_version.py +__version__ = "0.3.74" \ No newline at end of file diff --git a/crawl4ai/async_crawler_strategy.py b/crawl4ai/async_crawler_strategy.py index e969995..3f332eb 100644 --- a/crawl4ai/async_crawler_strategy.py +++ b/crawl4ai/async_crawler_strategy.py @@ -1,27 +1,198 @@ import asyncio -import base64, time +import base64 +import time from abc import ABC, abstractmethod from typing import Callable, Dict, Any, List, Optional, Awaitable -import os +import os, sys, shutil +import tempfile, subprocess from playwright.async_api import async_playwright, Page, Browser, Error from io import BytesIO from PIL import Image, ImageDraw, ImageFont -from .utils import sanitize_input_encode, calculate_semaphore_count -import json, uuid -import hashlib from pathlib import Path from playwright.async_api import ProxySettings from pydantic import BaseModel +import hashlib +import json +import uuid +from .models import AsyncCrawlResponse + +from playwright_stealth import StealthConfig, stealth_async + +stealth_config = StealthConfig( + webdriver=True, + chrome_app=True, + chrome_csi=True, + chrome_load_times=True, + chrome_runtime=True, + navigator_languages=True, + navigator_plugins=True, + navigator_permissions=True, + webgl_vendor=True, + outerdimensions=True, + navigator_hardware_concurrency=True, + media_codecs=True, +) + + +class ManagedBrowser: + def __init__(self, browser_type: str = "chromium", user_data_dir: Optional[str] = None, headless: bool = False, logger = None): + self.browser_type = browser_type + self.user_data_dir = user_data_dir + self.headless = headless + self.browser_process = None + self.temp_dir = None + self.debugging_port = 9222 + self.logger = logger + self.shutting_down = False + + async def start(self) -> str: + """ + Starts the browser process and returns the CDP endpoint URL. + If user_data_dir is not provided, creates a temporary directory. + """ + + # Create temp dir if needed + if not self.user_data_dir: + self.temp_dir = tempfile.mkdtemp(prefix="browser-profile-") + self.user_data_dir = self.temp_dir + + # Get browser path and args based on OS and browser type + browser_path = self._get_browser_path() + args = self._get_browser_args() + + # Start browser process + try: + self.browser_process = subprocess.Popen( + args, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + # Monitor browser process output for errors + asyncio.create_task(self._monitor_browser_process()) + await asyncio.sleep(2) # Give browser time to start + return f"http://localhost:{self.debugging_port}" + except Exception as e: + await self.cleanup() + raise Exception(f"Failed to start browser: {e}") + + async def _monitor_browser_process(self): + """Monitor the browser process for unexpected termination.""" + if self.browser_process: + try: + stdout, stderr = await asyncio.gather( + asyncio.to_thread(self.browser_process.stdout.read), + asyncio.to_thread(self.browser_process.stderr.read) + ) + + # Check shutting_down flag BEFORE logging anything + if self.browser_process.poll() is not None: + if not self.shutting_down: + self.logger.error( + message="Browser process terminated unexpectedly | Code: {code} | STDOUT: {stdout} | STDERR: {stderr}", + tag="ERROR", + params={ + "code": self.browser_process.returncode, + "stdout": stdout.decode(), + "stderr": stderr.decode() + } + ) + await self.cleanup() + else: + self.logger.info( + message="Browser process terminated normally | Code: {code}", + tag="INFO", + params={"code": self.browser_process.returncode} + ) + except Exception as e: + if not self.shutting_down: + self.logger.error( + message="Error monitoring browser process: {error}", + tag="ERROR", + params={"error": str(e)} + ) + + def _get_browser_path(self) -> str: + """Returns the browser executable path based on OS and browser type""" + if sys.platform == "darwin": # macOS + paths = { + "chromium": "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", + "firefox": "/Applications/Firefox.app/Contents/MacOS/firefox", + "webkit": "/Applications/Safari.app/Contents/MacOS/Safari" + } + elif sys.platform == "win32": # Windows + paths = { + "chromium": "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe", + "firefox": "C:\\Program Files\\Mozilla Firefox\\firefox.exe", + "webkit": None # WebKit not supported on Windows + } + else: # Linux + paths = { + "chromium": "google-chrome", + "firefox": "firefox", + "webkit": None # WebKit not supported on Linux + } + + return paths.get(self.browser_type) -class AsyncCrawlResponse(BaseModel): - html: str - response_headers: Dict[str, str] - status_code: int - screenshot: Optional[str] = None - get_delayed_content: Optional[Callable[[Optional[float]], Awaitable[str]]] = None + def _get_browser_args(self) -> List[str]: + """Returns browser-specific command line arguments""" + base_args = [self._get_browser_path()] + + if self.browser_type == "chromium": + args = [ + f"--remote-debugging-port={self.debugging_port}", + f"--user-data-dir={self.user_data_dir}", + ] + if self.headless: + args.append("--headless=new") + elif self.browser_type == "firefox": + args = [ + "--remote-debugging-port", str(self.debugging_port), + "--profile", self.user_data_dir, + ] + if self.headless: + args.append("--headless") + else: + raise NotImplementedError(f"Browser type {self.browser_type} not supported") + + return base_args + args + + async def cleanup(self): + """Cleanup browser process and temporary directory""" + # Set shutting_down flag BEFORE any termination actions + self.shutting_down = True + + if self.browser_process: + try: + self.browser_process.terminate() + # Wait for process to end gracefully + for _ in range(10): # 10 attempts, 100ms each + if self.browser_process.poll() is not None: + break + await asyncio.sleep(0.1) + + # Force kill if still running + if self.browser_process.poll() is None: + self.browser_process.kill() + await asyncio.sleep(0.1) # Brief wait for kill to take effect + + except Exception as e: + self.logger.error( + message="Error terminating browser: {error}", + tag="ERROR", + params={"error": str(e)} + ) + + if self.temp_dir and os.path.exists(self.temp_dir): + try: + shutil.rmtree(self.temp_dir) + except Exception as e: + self.logger.error( + message="Error removing temporary directory: {error}", + tag="ERROR", + params={"error": str(e)} + ) - class Config: - arbitrary_types_allowed = True class AsyncCrawlerStrategy(ABC): @abstractmethod @@ -33,7 +204,7 @@ async def crawl_many(self, urls: List[str], **kwargs) -> List[AsyncCrawlResponse pass @abstractmethod - async def take_screenshot(self, url: str) -> str: + async def take_screenshot(self, **kwargs) -> str: pass @abstractmethod @@ -45,19 +216,33 @@ def set_hook(self, hook_type: str, hook: Callable): pass class AsyncPlaywrightCrawlerStrategy(AsyncCrawlerStrategy): - def __init__(self, use_cached_html=False, js_code=None, **kwargs): + def __init__(self, use_cached_html=False, js_code=None, logger = None, **kwargs): + self.logger = logger self.use_cached_html = use_cached_html - self.user_agent = kwargs.get("user_agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") + self.user_agent = kwargs.get( + "user_agent", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" + ) self.proxy = kwargs.get("proxy") + self.proxy_config = kwargs.get("proxy_config") self.headless = kwargs.get("headless", True) - self.browser_type = kwargs.get("browser_type", "chromium") # New parameter + self.browser_type = kwargs.get("browser_type", "chromium") self.headers = kwargs.get("headers", {}) + self.cookies = kwargs.get("cookies", []) self.sessions = {} self.session_ttl = 1800 self.js_code = js_code self.verbose = kwargs.get("verbose", False) self.playwright = None self.browser = None + self.sleep_on_close = kwargs.get("sleep_on_close", False) + self.use_managed_browser = kwargs.get("use_managed_browser", False) + self.user_data_dir = kwargs.get("user_data_dir", None) + self.use_persistent_context = kwargs.get("use_persistent_context", False) + self.chrome_channel = kwargs.get("chrome_channel", "chrome") + self.managed_browser = None + self.default_context = None self.hooks = { 'on_browser_created': None, 'on_user_agent_updated': None, @@ -67,6 +252,14 @@ def __init__(self, use_cached_html=False, js_code=None, **kwargs): 'before_return_html': None, 'before_retrieve_html': None } + self.extra_args = kwargs.get("extra_args", []) + self.accept_downloads = kwargs.get("accept_downloads", False) + self.downloads_path = kwargs.get("downloads_path") + self._downloaded_files = [] # Track downloaded files for current crawl + if self.accept_downloads and not self.downloads_path: + self.downloads_path = os.path.join(os.getcwd(), "downloads") + os.makedirs(self.downloads_path, exist_ok=True) + async def __aenter__(self): await self.start() @@ -79,43 +272,143 @@ async def start(self): if self.playwright is None: self.playwright = await async_playwright().start() if self.browser is None: - browser_args = { - "headless": self.headless, - "args": [ - "--disable-gpu", - "--disable-dev-shm-usage", - "--disable-setuid-sandbox", - "--no-sandbox", - ] - } - - # Add proxy settings if a proxy is specified - if self.proxy: - proxy_settings = ProxySettings(server=self.proxy) - browser_args["proxy"] = proxy_settings + if self.use_managed_browser: + # Use managed browser approach + self.managed_browser = ManagedBrowser( + browser_type=self.browser_type, + user_data_dir=self.user_data_dir, + headless=self.headless, + logger=self.logger + ) + cdp_url = await self.managed_browser.start() + self.browser = await self.playwright.chromium.connect_over_cdp(cdp_url) + # Get the default context that maintains the user profile + contexts = self.browser.contexts + if contexts: + self.default_context = contexts[0] + else: + # If no default context exists, create one + self.default_context = await self.browser.new_context( + viewport={"width": 1920, "height": 1080} + ) - # Select the appropriate browser based on the browser_type - if self.browser_type == "firefox": - self.browser = await self.playwright.firefox.launch(**browser_args) - elif self.browser_type == "webkit": - self.browser = await self.playwright.webkit.launch(**browser_args) + # Set up the default context + if self.default_context: + await self.default_context.set_extra_http_headers(self.headers) + if self.cookies: + await self.default_context.add_cookies(self.cookies) + if self.accept_downloads: + await self.default_context.set_default_timeout(60000) + await self.default_context.set_default_navigation_timeout(60000) + self.default_context._impl_obj._options["accept_downloads"] = True + self.default_context._impl_obj._options["downloads_path"] = self.downloads_path + + if self.user_agent: + await self.default_context.set_extra_http_headers({ + "User-Agent": self.user_agent + }) else: - self.browser = await self.playwright.chromium.launch(**browser_args) + # Base browser arguments + browser_args = { + "headless": self.headless, + "args": [ + "--no-sandbox", + "--disable-dev-shm-usage", + "--no-first-run", + "--no-default-browser-check", + "--disable-infobars", + "--window-position=0,0", + "--ignore-certificate-errors", + "--ignore-certificate-errors-spki-list", + ] + } + + # Add channel if specified (try Chrome first) + if self.chrome_channel: + browser_args["channel"] = self.chrome_channel + + # Add extra args if provided + if self.extra_args: + browser_args["args"].extend(self.extra_args) + + # Add downloads path if downloads are enabled + if self.accept_downloads: + browser_args["downloads_path"] = self.downloads_path + + # Add proxy settings if a proxy is specified + if self.proxy: + proxy_settings = ProxySettings(server=self.proxy) + browser_args["proxy"] = proxy_settings + elif self.proxy_config: + proxy_settings = ProxySettings( + server=self.proxy_config.get("server"), + username=self.proxy_config.get("username"), + password=self.proxy_config.get("password") + ) + browser_args["proxy"] = proxy_settings + + try: + # Select the appropriate browser based on the browser_type + if self.browser_type == "firefox": + self.browser = await self.playwright.firefox.launch(**browser_args) + elif self.browser_type == "webkit": + self.browser = await self.playwright.webkit.launch(**browser_args) + else: + if self.use_persistent_context and self.user_data_dir: + self.browser = await self.playwright.chromium.launch_persistent_context( + user_data_dir=self.user_data_dir, + accept_downloads=self.accept_downloads, + downloads_path=self.downloads_path if self.accept_downloads else None, + **browser_args + ) + self.default_context = self.browser + else: + self.browser = await self.playwright.chromium.launch(**browser_args) + + except Exception as e: + # Fallback to chromium if Chrome channel fails + if "chrome" in str(e) and browser_args.get("channel") == "chrome": + browser_args["channel"] = "chromium" + if self.use_persistent_context and self.user_data_dir: + self.browser = await self.playwright.chromium.launch_persistent_context( + user_data_dir=self.user_data_dir, + **browser_args + ) + self.default_context = self.browser + else: + self.browser = await self.playwright.chromium.launch(**browser_args) + else: + raise await self.execute_hook('on_browser_created', self.browser) async def close(self): + if self.sleep_on_close: + await asyncio.sleep(0.5) + + # Close all active sessions + session_ids = list(self.sessions.keys()) + for session_id in session_ids: + await self.kill_session(session_id) + if self.browser: await self.browser.close() self.browser = None + + if self.managed_browser: + await asyncio.sleep(0.5) + await self.managed_browser.cleanup() + self.managed_browser = None + if self.playwright: await self.playwright.stop() self.playwright = None - def __del__(self): - if self.browser or self.playwright: - asyncio.get_event_loop().run_until_complete(self.close()) + # Issue #256: Remove __del__ method to avoid potential issues with async cleanup + # def __del__(self): + # if self.browser or self.playwright: + # asyncio.get_event_loop().run_until_complete(self.close()) def set_hook(self, hook_type: str, hook: Callable): if hook_type in self.hooks: @@ -142,13 +435,16 @@ async def kill_session(self, session_id: str): if session_id in self.sessions: context, page, _ = self.sessions[session_id] await page.close() - await context.close() + if not self.use_managed_browser: + await context.close() del self.sessions[session_id] def _cleanup_expired_sessions(self): current_time = time.time() - expired_sessions = [sid for sid, (_, _, last_used) in self.sessions.items() - if current_time - last_used > self.session_ttl] + expired_sessions = [ + sid for sid, (_, _, last_used) in self.sessions.items() + if current_time - last_used > self.session_ttl + ] for sid in expired_sessions: asyncio.create_task(self.kill_session(sid)) @@ -188,8 +484,8 @@ async def smart_wait(self, page: Page, wait_for: str, timeout: float = 30000): return await self.csp_compliant_wait(page, f"() => {{{wait_for}}}", timeout) except Error: raise ValueError(f"Invalid wait_for parameter: '{wait_for}'. " - "It should be either a valid CSS selector, a JavaScript function, " - "or explicitly prefixed with 'js:' or 'css:'.") + "It should be either a valid CSS selector, a JavaScript function, " + "or explicitly prefixed with 'js:' or 'css:'.") async def csp_compliant_wait(self, page: Page, user_wait_function: str, timeout: float = 30000): wrapper_js = f""" @@ -249,44 +545,206 @@ async def process_iframes(self, page): }} """) else: - print(f"Warning: Could not access content frame for iframe {i}") + # print(f"Warning: Could not access content frame for iframe {i}") + self.logger.warning( + message="Could not access content frame for iframe {index}", + tag="SCRAPE", + params={"index": i} + ) except Exception as e: - print(f"Error processing iframe {i}: {str(e)}") + self.logger.error( + message="Error processing iframe {index}: {error}", + tag="ERROR", + params={"index": i, "error": str(e)} + ) + # print(f"Error processing iframe {i}: {str(e)}") # Return the page object - return page - + return page async def crawl(self, url: str, **kwargs) -> AsyncCrawlResponse: + """ + Crawls a given URL or processes raw HTML/local file content based on the URL prefix. + + Args: + url (str): The URL to crawl. Supported prefixes: + - 'http://' or 'https://': Web URL to crawl. + - 'file://': Local file path to process. + - 'raw:': Raw HTML content to process. + **kwargs: Additional parameters: + - 'screenshot' (bool): Whether to take a screenshot. + - ... [other existing parameters] + + Returns: + AsyncCrawlResponse: The response containing HTML, headers, status code, and optional screenshot. + """ + response_headers = {} + status_code = 200 # Default to 200 for local/raw HTML + screenshot_requested = kwargs.get('screenshot', False) + screenshot_data = None + + if url.startswith(('http://', 'https://')): + # Proceed with standard web crawling + return await self._crawl_web(url, **kwargs) + + elif url.startswith('file://'): + # Process local file + local_file_path = url[7:] # Remove 'file://' prefix + if not os.path.exists(local_file_path): + raise FileNotFoundError(f"Local file not found: {local_file_path}") + with open(local_file_path, 'r', encoding='utf-8') as f: + html = f.read() + if screenshot_requested: + screenshot_data = await self._generate_screenshot_from_html(html) + return AsyncCrawlResponse( + html=html, + response_headers=response_headers, + status_code=status_code, + screenshot=screenshot_data, + get_delayed_content=None + ) + + elif url.startswith('raw:'): + # Process raw HTML content + raw_html = url[4:] # Remove 'raw:' prefix + html = raw_html + if screenshot_requested: + screenshot_data = await self._generate_screenshot_from_html(html) + return AsyncCrawlResponse( + html=html, + response_headers=response_headers, + status_code=status_code, + screenshot=screenshot_data, + get_delayed_content=None + ) + else: + raise ValueError("URL must start with 'http://', 'https://', 'file://', or 'raw:'") + + + async def _crawl_web(self, url: str, **kwargs) -> AsyncCrawlResponse: + """ + Existing web crawling logic remains unchanged. + + Args: + url (str): The web URL to crawl. + **kwargs: Additional parameters. + + Returns: + AsyncCrawlResponse: The response containing HTML, headers, status code, and optional screenshot. + """ response_headers = {} status_code = None + # Reset downloaded files list for new crawl + self._downloaded_files = [] + self._cleanup_expired_sessions() session_id = kwargs.get("session_id") - if session_id: - context, page, _ = self.sessions.get(session_id, (None, None, None)) - if not context: - context = await self.browser.new_context( - user_agent=self.user_agent, - proxy={"server": self.proxy} if self.proxy else None - ) - await context.set_extra_http_headers(self.headers) - page = await context.new_page() - self.sessions[session_id] = (context, page, time.time()) + + # Handle page creation differently for managed browser + if self.use_managed_browser: + if session_id: + # Reuse existing session if available + context, page, _ = self.sessions.get(session_id, (None, None, None)) + if not page: + # Create new page in default context if session doesn't exist + page = await self.default_context.new_page() + self.sessions[session_id] = (self.default_context, page, time.time()) + else: + # Create new page in default context for non-session requests + page = await self.default_context.new_page() else: - context = await self.browser.new_context( - user_agent=self.user_agent, - proxy={"server": self.proxy} if self.proxy else None - ) - await context.set_extra_http_headers(self.headers) - page = await context.new_page() + if session_id: + context, page, _ = self.sessions.get(session_id, (None, None, None)) + if not context: + if self.use_persistent_context and self.browser_type in ["chrome", "chromium"]: + # In persistent context, browser is the context + context = self.browser + page = await context.new_page() + else: + # Normal context creation for non-persistent or non-Chrome browsers + context = await self.browser.new_context( + user_agent=self.user_agent, + viewport={"width": 1200, "height": 800}, + proxy={"server": self.proxy} if self.proxy else None, + java_script_enabled=True, + accept_downloads=self.accept_downloads, + # downloads_path=self.downloads_path if self.accept_downloads else None + ) + await context.add_cookies([{"name": "cookiesEnabled", "value": "true", "url": url}]) + if self.cookies: + await context.add_cookies(self.cookies) + await context.set_extra_http_headers(self.headers) + page = await context.new_page() + self.sessions[session_id] = (context, page, time.time()) + else: + if self.use_persistent_context and self.browser_type in ["chrome", "chromium"]: + # In persistent context, browser is the context + context = self.browser + else: + # Normal context creation + context = await self.browser.new_context( + user_agent=self.user_agent, + viewport={"width": 1920, "height": 1080}, + proxy={"server": self.proxy} if self.proxy else None, + accept_downloads=self.accept_downloads, + ) + if self.cookies: + await context.add_cookies(self.cookies) + await context.set_extra_http_headers(self.headers) + + if kwargs.get("override_navigator", False) or kwargs.get("simulate_user", False) or kwargs.get("magic", False): + # Inject scripts to override navigator properties + await context.add_init_script(""" + // Pass the Permissions Test. + const originalQuery = window.navigator.permissions.query; + window.navigator.permissions.query = (parameters) => ( + parameters.name === 'notifications' ? + Promise.resolve({ state: Notification.permission }) : + originalQuery(parameters) + ); + Object.defineProperty(navigator, 'webdriver', { + get: () => undefined + }); + window.navigator.chrome = { + runtime: {}, + // Add other properties if necessary + }; + Object.defineProperty(navigator, 'plugins', { + get: () => [1, 2, 3, 4, 5], + }); + Object.defineProperty(navigator, 'languages', { + get: () => ['en-US', 'en'], + }); + Object.defineProperty(document, 'hidden', { + get: () => false + }); + Object.defineProperty(document, 'visibilityState', { + get: () => 'visible' + }); + """) + + page = await context.new_page() + if kwargs.get("magic", False): + await stealth_async(page, stealth_config) + # Add console message and error logging + if kwargs.get("log_console", False): + page.on("console", lambda msg: print(f"Console: {msg.text}")) + page.on("pageerror", lambda exc: print(f"Page Error: {exc}")) + try: - if self.verbose: - print(f"[LOG] 🕸️ Crawling {url} using AsyncPlaywrightCrawlerStrategy...") + # Set up download handling if enabled + if self.accept_downloads: + page.on("download", lambda download: asyncio.create_task(self._handle_download(download))) + + # if self.verbose: + # print(f"[LOG] 🕸️ Crawling {url} using AsyncPlaywrightCrawlerStrategy...") if self.use_cached_html: - cache_file_path = os.path.join(Path.home(), ".crawl4ai", "cache", hashlib.md5(url.encode()).hexdigest()) + cache_file_path = os.path.join( + os.getenv("CRAWL4_AI_BASE_DIRECTORY", Path.home()), ".crawl4ai", "cache", hashlib.md5(url.encode()).hexdigest() + ) if os.path.exists(cache_file_path): html = "" with open(cache_file_path, "r") as f: @@ -296,12 +754,25 @@ async def crawl(self, url: str, **kwargs) -> AsyncCrawlResponse: meta = json.load(f) response_headers = meta.get("response_headers", {}) status_code = meta.get("status_code") - response = AsyncCrawlResponse(html=html, response_headers=response_headers, status_code=status_code) + response = AsyncCrawlResponse( + html=html, response_headers=response_headers, status_code=status_code + ) return response if not kwargs.get("js_only", False): await self.execute_hook('before_goto', page) - response = await page.goto(url, wait_until="domcontentloaded", timeout=kwargs.get("page_timeout", 60000)) + + + response = await page.goto( + url, + # wait_until=kwargs.get("wait_until", ["domcontentloaded", "networkidle"]), + wait_until=kwargs.get("wait_until", "domcontentloaded"), + timeout=kwargs.get("page_timeout", 60000) + ) + + # response = await page.goto("about:blank") + # await page.evaluate(f"window.location.href = '{url}'") + await self.execute_hook('after_goto', page) # Get status code and headers @@ -311,51 +782,83 @@ async def crawl(self, url: str, **kwargs) -> AsyncCrawlResponse: status_code = 200 response_headers = {} - - await page.wait_for_selector('body') + # Replace the current wait_for_selector line with this more robust check: + try: + # First wait for body to exist, regardless of visibility + await page.wait_for_selector('body', state='attached', timeout=30000) + + # Then wait for it to become visible by checking CSS + await page.wait_for_function(""" + () => { + const body = document.body; + const style = window.getComputedStyle(body); + return style.display !== 'none' && + style.visibility !== 'hidden' && + style.opacity !== '0'; + } + """, timeout=30000) + + except Error as e: + # If waiting fails, let's try to diagnose the issue + visibility_info = await page.evaluate(""" + () => { + const body = document.body; + const style = window.getComputedStyle(body); + return { + display: style.display, + visibility: style.visibility, + opacity: style.opacity, + hasContent: body.innerHTML.length, + classList: Array.from(body.classList) + } + } + """) + + if self.verbose: + print(f"Body visibility debug info: {visibility_info}") + + # Even if body is hidden, we might still want to proceed + if kwargs.get('ignore_body_visibility', True): + if self.verbose: + print("Proceeding despite hidden body...") + pass + else: + raise Error(f"Body element is hidden: {visibility_info}") + await page.evaluate("window.scrollTo(0, document.body.scrollHeight)") js_code = kwargs.get("js_code", kwargs.get("js", self.js_code)) if js_code: if isinstance(js_code, str): - r = await page.evaluate(js_code) + await page.evaluate(js_code) elif isinstance(js_code, list): for js in js_code: await page.evaluate(js) # await page.wait_for_timeout(100) - await page.wait_for_load_state('networkidle') - # Check for on execution even + + # Check for on execution event await self.execute_hook('on_execution_started', page) - # New code to handle the wait_for parameter - # Example usage: - # await crawler.crawl( - # url, - # js_code="// some JavaScript code", - # wait_for="""() => { - # return document.querySelector('#my-element') !== null; - # }""" - # ) - # Example of using a CSS selector: - # await crawler.crawl( - # url, - # wait_for="#my-element" - # ) + if kwargs.get("simulate_user", False) or kwargs.get("magic", False): + # Simulate user interactions + await page.mouse.move(100, 100) + await page.mouse.down() + await page.mouse.up() + await page.keyboard.press('ArrowDown') + + # Handle the wait_for parameter wait_for = kwargs.get("wait_for") if wait_for: try: await self.smart_wait(page, wait_for, timeout=kwargs.get("page_timeout", 60000)) except Exception as e: raise RuntimeError(f"Wait condition failed: {str(e)}") - - # Check if kwargs has screenshot=True then take screenshot - screenshot_data = None - if kwargs.get("screenshot"): - screenshot_data = await self.take_screenshot(url) - - # New code to update image dimensions + # if not wait_for and js_code: + # await page.wait_for_load_state('networkidle', timeout=5000) + + # Update image dimensions update_image_dimensions_js = """ () => { return new Promise((resolve) => { @@ -407,7 +910,8 @@ async def crawl(self, url: str, **kwargs) -> AsyncCrawlResponse: }); // Fallback timeout of 5 seconds - setTimeout(() => resolve(), 5000); + // setTimeout(() => resolve(), 5000); + resolve(); }); } """ @@ -426,14 +930,29 @@ async def crawl(self, url: str, **kwargs) -> AsyncCrawlResponse: if delay_before_return_html: await asyncio.sleep(delay_before_return_html) + # Check for remove_overlay_elements parameter + if kwargs.get("remove_overlay_elements", False): + await self.remove_overlay_elements(page) + html = await page.content() await self.execute_hook('before_return_html', page, html) + + # Check if kwargs has screenshot=True then take screenshot + screenshot_data = None + if kwargs.get("screenshot"): + # Check we have screenshot_wait_for parameter, if we have simply wait for that time + screenshot_wait_for = kwargs.get("screenshot_wait_for") + if screenshot_wait_for: + await asyncio.sleep(screenshot_wait_for) + screenshot_data = await self.take_screenshot(page) - if self.verbose: - print(f"[LOG] ✅ Crawled {url} successfully!") - + # if self.verbose: + # print(f"[LOG] ✅ Crawled {url} successfully!") + if self.use_cached_html: - cache_file_path = os.path.join(Path.home(), ".crawl4ai", "cache", hashlib.md5(url.encode()).hexdigest()) + cache_file_path = os.path.join( + os.getenv("CRAWL4_AI_BASE_DIRECTORY", Path.home()), ".crawl4ai", "cache", hashlib.md5(url.encode()).hexdigest() + ) with open(cache_file_path, "w", encoding="utf-8") as f: f.write(html) # store response headers and status code in cache @@ -443,7 +962,6 @@ async def crawl(self, url: str, **kwargs) -> AsyncCrawlResponse: "status_code": status_code }, f) - async def get_delayed_content(delay: float = 5.0) -> str: if self.verbose: print(f"[LOG] Waiting for {delay} seconds before retrieving content for {url}") @@ -455,67 +973,51 @@ async def get_delayed_content(delay: float = 5.0) -> str: response_headers=response_headers, status_code=status_code, screenshot=screenshot_data, - get_delayed_content=get_delayed_content + get_delayed_content=get_delayed_content, + downloaded_files=self._downloaded_files if self._downloaded_files else None ) return response except Error as e: - raise Error(f"Failed to crawl {url}: {str(e)}") - finally: - if not session_id: - await page.close() - - # try: - # html = await _crawl() - # return sanitize_input_encode(html) - # except Error as e: - # raise Error(f"Failed to crawl {url}: {str(e)}") - # except Exception as e: - # raise Exception(f"Failed to crawl {url}: {str(e)}") - - async def execute_js(self, session_id: str, js_code: str, wait_for_js: str = None, wait_for_css: str = None) -> AsyncCrawlResponse: - """ - Execute JavaScript code in a specific session and optionally wait for a condition. - - :param session_id: The ID of the session to execute the JS code in. - :param js_code: The JavaScript code to execute. - :param wait_for_js: JavaScript condition to wait for after execution. - :param wait_for_css: CSS selector to wait for after execution. - :return: AsyncCrawlResponse containing the page's HTML and other information. - :raises ValueError: If the session does not exist. - """ - if not session_id: - raise ValueError("Session ID must be provided") - - if session_id not in self.sessions: - raise ValueError(f"No active session found for session ID: {session_id}") - - context, page, last_used = self.sessions[session_id] - + raise Error(f"async_crawler_strategy.py:_crawleb(): {str(e)}") + # finally: + # if not session_id: + # await page.close() + # await context.close() + + async def _handle_download(self, download): + """Handle file downloads.""" try: - await page.evaluate(js_code) - - if wait_for_js: - await page.wait_for_function(wait_for_js) - - if wait_for_css: - await page.wait_for_selector(wait_for_css) - - # Get the updated HTML content - html = await page.content() + suggested_filename = download.suggested_filename + download_path = os.path.join(self.downloads_path, suggested_filename) - # Get response headers and status code (assuming these are available) - response_headers = await page.evaluate("() => JSON.stringify(performance.getEntriesByType('resource')[0].responseHeaders)") - status_code = await page.evaluate("() => performance.getEntriesByType('resource')[0].responseStatus") - - # Update the last used time for this session - self.sessions[session_id] = (context, page, time.time()) + self.logger.info( + message="Downloading {filename} to {path}", + tag="FETCH", + params={"filename": suggested_filename, "path": download_path} + ) + + start_time = time.perf_counter() + await download.save_as(download_path) + end_time = time.perf_counter() + self._downloaded_files.append(download_path) + + self.logger.success( + message="Downloaded {filename} successfully", + tag="COMPLETE", + params={"filename": suggested_filename, "path": download_path, "duration": f"{end_time - start_time:.2f}s"} + ) + except Exception as e: + self.logger.error( + message="Failed to handle download: {error}", + tag="ERROR", + params={"error": str(e)} + ) - return AsyncCrawlResponse(html=html, response_headers=response_headers, status_code=status_code) - except Error as e: - raise Error(f"Failed to execute JavaScript or wait for condition in session {session_id}: {str(e)}") + # if self.verbose: + # print(f"[ERROR] Failed to handle download: {str(e)}") async def crawl_many(self, urls: List[str], **kwargs) -> List[AsyncCrawlResponse]: - semaphore_count = kwargs.get('semaphore_count', calculate_semaphore_count()) + semaphore_count = kwargs.get('semaphore_count', 5) # Adjust as needed semaphore = asyncio.Semaphore(semaphore_count) async def crawl_with_semaphore(url): @@ -526,27 +1028,212 @@ async def crawl_with_semaphore(url): results = await asyncio.gather(*tasks, return_exceptions=True) return [result if not isinstance(result, Exception) else str(result) for result in results] - async def take_screenshot(self, url: str, wait_time = 1000) -> str: - async with await self.browser.new_context(user_agent=self.user_agent) as context: - page = await context.new_page() - try: - await page.goto(url, wait_until="domcontentloaded", timeout=30000) - # Wait for a specified time (default is 1 second) - await page.wait_for_timeout(wait_time) - screenshot = await page.screenshot(full_page=True) - return base64.b64encode(screenshot).decode('utf-8') - except Exception as e: - error_message = f"Failed to take screenshot: {str(e)}" - print(error_message) - - # Generate an error image - img = Image.new('RGB', (800, 600), color='black') - draw = ImageDraw.Draw(img) - font = ImageFont.load_default() - draw.text((10, 10), error_message, fill=(255, 255, 255), font=font) + async def remove_overlay_elements(self, page: Page) -> None: + """ + Removes popup overlays, modals, cookie notices, and other intrusive elements from the page. + + Args: + page (Page): The Playwright page instance + """ + remove_overlays_js = """ + async () => { + // Function to check if element is visible + const isVisible = (elem) => { + const style = window.getComputedStyle(elem); + return style.display !== 'none' && + style.visibility !== 'hidden' && + style.opacity !== '0'; + }; + + // Common selectors for popups and overlays + const commonSelectors = [ + // Close buttons first + 'button[class*="close" i]', 'button[class*="dismiss" i]', + 'button[aria-label*="close" i]', 'button[title*="close" i]', + 'a[class*="close" i]', 'span[class*="close" i]', - buffered = BytesIO() - img.save(buffered, format="JPEG") - return base64.b64encode(buffered.getvalue()).decode('utf-8') - finally: - await page.close() \ No newline at end of file + // Cookie notices + '[class*="cookie-banner" i]', '[id*="cookie-banner" i]', + '[class*="cookie-consent" i]', '[id*="cookie-consent" i]', + + // Newsletter/subscription dialogs + '[class*="newsletter" i]', '[class*="subscribe" i]', + + // Generic popups/modals + '[class*="popup" i]', '[class*="modal" i]', + '[class*="overlay" i]', '[class*="dialog" i]', + '[role="dialog"]', '[role="alertdialog"]' + ]; + + // Try to click close buttons first + for (const selector of commonSelectors.slice(0, 6)) { + const closeButtons = document.querySelectorAll(selector); + for (const button of closeButtons) { + if (isVisible(button)) { + try { + button.click(); + await new Promise(resolve => setTimeout(resolve, 100)); + } catch (e) { + console.log('Error clicking button:', e); + } + } + } + } + + // Remove remaining overlay elements + const removeOverlays = () => { + // Find elements with high z-index + const allElements = document.querySelectorAll('*'); + for (const elem of allElements) { + const style = window.getComputedStyle(elem); + const zIndex = parseInt(style.zIndex); + const position = style.position; + + if ( + isVisible(elem) && + (zIndex > 999 || position === 'fixed' || position === 'absolute') && + ( + elem.offsetWidth > window.innerWidth * 0.5 || + elem.offsetHeight > window.innerHeight * 0.5 || + style.backgroundColor.includes('rgba') || + parseFloat(style.opacity) < 1 + ) + ) { + elem.remove(); + } + } + + // Remove elements matching common selectors + for (const selector of commonSelectors) { + const elements = document.querySelectorAll(selector); + elements.forEach(elem => { + if (isVisible(elem)) { + elem.remove(); + } + }); + } + }; + + // Remove overlay elements + removeOverlays(); + + // Remove any fixed/sticky position elements at the top/bottom + const removeFixedElements = () => { + const elements = document.querySelectorAll('*'); + elements.forEach(elem => { + const style = window.getComputedStyle(elem); + if ( + (style.position === 'fixed' || style.position === 'sticky') && + isVisible(elem) + ) { + elem.remove(); + } + }); + }; + + removeFixedElements(); + + // Remove empty block elements as: div, p, span, etc. + const removeEmptyBlockElements = () => { + const blockElements = document.querySelectorAll('div, p, span, section, article, header, footer, aside, nav, main, ul, ol, li, dl, dt, dd, h1, h2, h3, h4, h5, h6'); + blockElements.forEach(elem => { + if (elem.innerText.trim() === '') { + elem.remove(); + } + }); + }; + + // Remove margin-right and padding-right from body (often added by modal scripts) + document.body.style.marginRight = '0px'; + document.body.style.paddingRight = '0px'; + document.body.style.overflow = 'auto'; + + // Wait a bit for any animations to complete + await new Promise(resolve => setTimeout(resolve, 100)); + } + """ + + try: + await page.evaluate(remove_overlays_js) + await page.wait_for_timeout(500) # Wait for any animations to complete + except Exception as e: + self.logger.warning( + message="Failed to remove overlay elements: {error}", + tag="SCRAPE", + params={"error": str(e)} + ) + # if self.verbose: + # print(f"Warning: Failed to remove overlay elements: {str(e)}") + + async def take_screenshot(self, page: Page) -> str: + """ + Takes a screenshot of the current page. + + Args: + page (Page): The Playwright page instance + + Returns: + str: Base64-encoded screenshot image + """ + try: + # The page is already loaded, just take the screenshot + screenshot = await page.screenshot(full_page=True) + return base64.b64encode(screenshot).decode('utf-8') + except Exception as e: + error_message = f"Failed to take screenshot: {str(e)}" + self.logger.error( + message="Screenshot failed: {error}", + tag="ERROR", + params={"error": error_message} + ) + + + # Generate an error image + img = Image.new('RGB', (800, 600), color='black') + draw = ImageDraw.Draw(img) + font = ImageFont.load_default() + draw.text((10, 10), error_message, fill=(255, 255, 255), font=font) + + buffered = BytesIO() + img.save(buffered, format="JPEG") + return base64.b64encode(buffered.getvalue()).decode('utf-8') + finally: + await page.close() + + async def _generate_screenshot_from_html(self, html: str) -> Optional[str]: + """ + Generates a screenshot from raw HTML content. + + Args: + html (str): The HTML content to render and capture. + + Returns: + Optional[str]: Base64-encoded screenshot image or an error image if failed. + """ + try: + if not self.browser: + await self.start() + page = await self.browser.new_page() + await page.set_content(html, wait_until='networkidle') + screenshot = await page.screenshot(full_page=True) + await page.close() + return base64.b64encode(screenshot).decode('utf-8') + except Exception as e: + error_message = f"Failed to take screenshot: {str(e)}" + # print(error_message) + self.logger.error( + message="Screenshot failed: {error}", + tag="ERROR", + params={"error": error_message} + ) + + # Generate an error image + img = Image.new('RGB', (800, 600), color='black') + draw = ImageDraw.Draw(img) + font = ImageFont.load_default() + draw.text((10, 10), error_message, fill=(255, 255, 255), font=font) + + buffered = BytesIO() + img.save(buffered, format="JPEG") + return base64.b64encode(buffered.getvalue()).decode('utf-8') + diff --git a/crawl4ai/async_database.py b/crawl4ai/async_database.py index 61d98e9..3c97e7d 100644 --- a/crawl4ai/async_database.py +++ b/crawl4ai/async_database.py @@ -2,18 +2,165 @@ from pathlib import Path import aiosqlite import asyncio -from typing import Optional, Tuple +from typing import Optional, Tuple, Dict +from contextlib import asynccontextmanager +import logging +import json # Added for serialization/deserialization +from .utils import ensure_content_dirs, generate_content_hash +from .models import CrawlResult +import xxhash +import aiofiles +from .config import NEED_MIGRATION +from .version_manager import VersionManager +from .async_logger import AsyncLogger +# Set up logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) -DB_PATH = os.path.join(Path.home(), ".crawl4ai") +base_directory = DB_PATH = os.path.join(os.getenv("CRAWL4_AI_BASE_DIRECTORY", Path.home()), ".crawl4ai") os.makedirs(DB_PATH, exist_ok=True) -DB_PATH = os.path.join(DB_PATH, "crawl4ai.db") +DB_PATH = os.path.join(base_directory, "crawl4ai.db") class AsyncDatabaseManager: - def __init__(self): + def __init__(self, pool_size: int = 10, max_retries: int = 3): self.db_path = DB_PATH + self.content_paths = ensure_content_dirs(os.path.dirname(DB_PATH)) + self.pool_size = pool_size + self.max_retries = max_retries + self.connection_pool: Dict[int, aiosqlite.Connection] = {} + self.pool_lock = asyncio.Lock() + self.init_lock = asyncio.Lock() + self.connection_semaphore = asyncio.Semaphore(pool_size) + self._initialized = False + self.version_manager = VersionManager() + self.logger = AsyncLogger( + log_file=os.path.join(base_directory, ".crawl4ai", "crawler_db.log"), + verbose=False, + tag_width=10 + ) + + + async def initialize(self): + """Initialize the database and connection pool""" + try: + self.logger.info("Initializing database", tag="INIT") + # Ensure the database file exists + os.makedirs(os.path.dirname(self.db_path), exist_ok=True) + + # Check if version update is needed + needs_update = self.version_manager.needs_update() + + # Always ensure base table exists + await self.ainit_db() + + # Verify the table exists + async with aiosqlite.connect(self.db_path, timeout=30.0) as db: + async with db.execute( + "SELECT name FROM sqlite_master WHERE type='table' AND name='crawled_data'" + ) as cursor: + result = await cursor.fetchone() + if not result: + raise Exception("crawled_data table was not created") + + # If version changed or fresh install, run updates + if needs_update: + self.logger.info("New version detected, running updates", tag="INIT") + await self.update_db_schema() + from .migrations import run_migration # Import here to avoid circular imports + await run_migration() + self.version_manager.update_version() # Update stored version after successful migration + self.logger.success("Version update completed successfully", tag="COMPLETE") + else: + self.logger.success("Database initialization completed successfully", tag="COMPLETE") + + + except Exception as e: + self.logger.error( + message="Database initialization error: {error}", + tag="ERROR", + params={"error": str(e)} + ) + self.logger.info( + message="Database will be initialized on first use", + tag="INIT" + ) + + raise + + + async def cleanup(self): + """Cleanup connections when shutting down""" + async with self.pool_lock: + for conn in self.connection_pool.values(): + await conn.close() + self.connection_pool.clear() + + @asynccontextmanager + async def get_connection(self): + """Connection pool manager""" + if not self._initialized: + # Use an asyncio.Lock to ensure only one initialization occurs + async with self.init_lock: + if not self._initialized: + await self.initialize() + self._initialized = True + + await self.connection_semaphore.acquire() + task_id = id(asyncio.current_task()) + try: + async with self.pool_lock: + if task_id not in self.connection_pool: + conn = await aiosqlite.connect( + self.db_path, + timeout=30.0 + ) + await conn.execute('PRAGMA journal_mode = WAL') + await conn.execute('PRAGMA busy_timeout = 5000') + self.connection_pool[task_id] = conn + + yield self.connection_pool[task_id] + + except Exception as e: + self.logger.error( + message="Connection error: {error}", + tag="ERROR", + force_verbose=True, + params={"error": str(e)} + ) + raise + finally: + async with self.pool_lock: + if task_id in self.connection_pool: + await self.connection_pool[task_id].close() + del self.connection_pool[task_id] + self.connection_semaphore.release() + + + async def execute_with_retry(self, operation, *args): + """Execute database operations with retry logic""" + for attempt in range(self.max_retries): + try: + async with self.get_connection() as db: + result = await operation(db, *args) + await db.commit() + return result + except Exception as e: + if attempt == self.max_retries - 1: + self.logger.error( + message="Operation failed after {retries} attempts: {error}", + tag="ERROR", + force_verbose=True, + params={ + "retries": self.max_retries, + "error": str(e) + } + ) + raise + await asyncio.sleep(1 * (attempt + 1)) # Exponential backoff async def ainit_db(self): - async with aiosqlite.connect(self.db_path) as db: + """Initialize database schema""" + async with aiosqlite.connect(self.db_path, timeout=30.0) as db: await db.execute(''' CREATE TABLE IF NOT EXISTS crawled_data ( url TEXT PRIMARY KEY, @@ -25,90 +172,250 @@ async def ainit_db(self): media TEXT DEFAULT "{}", links TEXT DEFAULT "{}", metadata TEXT DEFAULT "{}", - screenshot TEXT DEFAULT "" + screenshot TEXT DEFAULT "", + response_headers TEXT DEFAULT "{}", + downloaded_files TEXT DEFAULT "{}" -- New column added ) ''') await db.commit() - await self.update_db_schema() + + async def update_db_schema(self): - async with aiosqlite.connect(self.db_path) as db: - # Check if the 'media' column exists + """Update database schema if needed""" + async with aiosqlite.connect(self.db_path, timeout=30.0) as db: cursor = await db.execute("PRAGMA table_info(crawled_data)") columns = await cursor.fetchall() column_names = [column[1] for column in columns] - if 'media' not in column_names: - await self.aalter_db_add_column('media') + # List of new columns to add + new_columns = ['media', 'links', 'metadata', 'screenshot', 'response_headers', 'downloaded_files'] - # Check for other missing columns and add them if necessary - for column in ['links', 'metadata', 'screenshot']: + for column in new_columns: if column not in column_names: - await self.aalter_db_add_column(column) + await self.aalter_db_add_column(column, db) + await db.commit() - async def aalter_db_add_column(self, new_column: str): - try: - async with aiosqlite.connect(self.db_path) as db: - await db.execute(f'ALTER TABLE crawled_data ADD COLUMN {new_column} TEXT DEFAULT ""') - await db.commit() - print(f"Added column '{new_column}' to the database.") - except Exception as e: - print(f"Error altering database to add {new_column} column: {e}") + async def aalter_db_add_column(self, new_column: str, db): + """Add new column to the database""" + if new_column == 'response_headers': + await db.execute(f'ALTER TABLE crawled_data ADD COLUMN {new_column} TEXT DEFAULT "{{}}"') + else: + await db.execute(f'ALTER TABLE crawled_data ADD COLUMN {new_column} TEXT DEFAULT ""') + self.logger.info( + message="Added column '{column}' to the database", + tag="INIT", + params={"column": new_column} + ) + + + async def aget_cached_url(self, url: str) -> Optional[CrawlResult]: + """Retrieve cached URL data as CrawlResult""" + async def _get(db): + async with db.execute( + 'SELECT * FROM crawled_data WHERE url = ?', (url,) + ) as cursor: + row = await cursor.fetchone() + if not row: + return None + + # Get column names + columns = [description[0] for description in cursor.description] + # Create dict from row data + row_dict = dict(zip(columns, row)) + + # Load content from files using stored hashes + content_fields = { + 'html': row_dict['html'], + 'cleaned_html': row_dict['cleaned_html'], + 'markdown': row_dict['markdown'], + 'extracted_content': row_dict['extracted_content'], + 'screenshot': row_dict['screenshot'] + } + + for field, hash_value in content_fields.items(): + if hash_value: + content = await self._load_content( + hash_value, + field.split('_')[0] # Get content type from field name + ) + row_dict[field] = content or "" + else: + row_dict[field] = "" + + # Parse JSON fields + json_fields = ['media', 'links', 'metadata', 'response_headers'] + for field in json_fields: + try: + row_dict[field] = json.loads(row_dict[field]) if row_dict[field] else {} + except json.JSONDecodeError: + row_dict[field] = {} + + # Parse downloaded_files + try: + row_dict['downloaded_files'] = json.loads(row_dict['downloaded_files']) if row_dict['downloaded_files'] else [] + except json.JSONDecodeError: + row_dict['downloaded_files'] = [] + + # Remove any fields not in CrawlResult model + valid_fields = CrawlResult.__annotations__.keys() + filtered_dict = {k: v for k, v in row_dict.items() if k in valid_fields} + + return CrawlResult(**filtered_dict) - async def aget_cached_url(self, url: str) -> Optional[Tuple[str, str, str, str, str, str, str, bool, str]]: try: - async with aiosqlite.connect(self.db_path) as db: - async with db.execute('SELECT url, html, cleaned_html, markdown, extracted_content, success, media, links, metadata, screenshot FROM crawled_data WHERE url = ?', (url,)) as cursor: - return await cursor.fetchone() + return await self.execute_with_retry(_get) except Exception as e: - print(f"Error retrieving cached URL: {e}") + self.logger.error( + message="Error retrieving cached URL: {error}", + tag="ERROR", + force_verbose=True, + params={"error": str(e)} + ) return None - async def acache_url(self, url: str, html: str, cleaned_html: str, markdown: str, extracted_content: str, success: bool, media: str = "{}", links: str = "{}", metadata: str = "{}", screenshot: str = ""): + async def acache_url(self, result: CrawlResult): + """Cache CrawlResult data""" + # Store content files and get hashes + content_map = { + 'html': (result.html, 'html'), + 'cleaned_html': (result.cleaned_html or "", 'cleaned'), + 'markdown': (result.markdown or "", 'markdown'), + 'extracted_content': (result.extracted_content or "", 'extracted'), + 'screenshot': (result.screenshot or "", 'screenshots') + } + + content_hashes = {} + for field, (content, content_type) in content_map.items(): + content_hashes[field] = await self._store_content(content, content_type) + + async def _cache(db): + await db.execute(''' + INSERT INTO crawled_data ( + url, html, cleaned_html, markdown, + extracted_content, success, media, links, metadata, + screenshot, response_headers, downloaded_files + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(url) DO UPDATE SET + html = excluded.html, + cleaned_html = excluded.cleaned_html, + markdown = excluded.markdown, + extracted_content = excluded.extracted_content, + success = excluded.success, + media = excluded.media, + links = excluded.links, + metadata = excluded.metadata, + screenshot = excluded.screenshot, + response_headers = excluded.response_headers, + downloaded_files = excluded.downloaded_files + ''', ( + result.url, + content_hashes['html'], + content_hashes['cleaned_html'], + content_hashes['markdown'], + content_hashes['extracted_content'], + result.success, + json.dumps(result.media), + json.dumps(result.links), + json.dumps(result.metadata or {}), + content_hashes['screenshot'], + json.dumps(result.response_headers or {}), + json.dumps(result.downloaded_files or []) + )) + try: - async with aiosqlite.connect(self.db_path) as db: - await db.execute(''' - INSERT INTO crawled_data (url, html, cleaned_html, markdown, extracted_content, success, media, links, metadata, screenshot) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) - ON CONFLICT(url) DO UPDATE SET - html = excluded.html, - cleaned_html = excluded.cleaned_html, - markdown = excluded.markdown, - extracted_content = excluded.extracted_content, - success = excluded.success, - media = excluded.media, - links = excluded.links, - metadata = excluded.metadata, - screenshot = excluded.screenshot - ''', (url, html, cleaned_html, markdown, extracted_content, success, media, links, metadata, screenshot)) - await db.commit() + await self.execute_with_retry(_cache) except Exception as e: - print(f"Error caching URL: {e}") + self.logger.error( + message="Error caching URL: {error}", + tag="ERROR", + force_verbose=True, + params={"error": str(e)} + ) + async def aget_total_count(self) -> int: + """Get total number of cached URLs""" + async def _count(db): + async with db.execute('SELECT COUNT(*) FROM crawled_data') as cursor: + result = await cursor.fetchone() + return result[0] if result else 0 + try: - async with aiosqlite.connect(self.db_path) as db: - async with db.execute('SELECT COUNT(*) FROM crawled_data') as cursor: - result = await cursor.fetchone() - return result[0] if result else 0 + return await self.execute_with_retry(_count) except Exception as e: - print(f"Error getting total count: {e}") + self.logger.error( + message="Error getting total count: {error}", + tag="ERROR", + force_verbose=True, + params={"error": str(e)} + ) return 0 async def aclear_db(self): + """Clear all data from the database""" + async def _clear(db): + await db.execute('DELETE FROM crawled_data') + try: - async with aiosqlite.connect(self.db_path) as db: - await db.execute('DELETE FROM crawled_data') - await db.commit() + await self.execute_with_retry(_clear) except Exception as e: - print(f"Error clearing database: {e}") + self.logger.error( + message="Error clearing database: {error}", + tag="ERROR", + force_verbose=True, + params={"error": str(e)} + ) async def aflush_db(self): + """Drop the entire table""" + async def _flush(db): + await db.execute('DROP TABLE IF EXISTS crawled_data') + try: - async with aiosqlite.connect(self.db_path) as db: - await db.execute('DROP TABLE IF EXISTS crawled_data') - await db.commit() + await self.execute_with_retry(_flush) except Exception as e: - print(f"Error flushing database: {e}") + self.logger.error( + message="Error flushing database: {error}", + tag="ERROR", + force_verbose=True, + params={"error": str(e)} + ) + + + async def _store_content(self, content: str, content_type: str) -> str: + """Store content in filesystem and return hash""" + if not content: + return "" + + content_hash = generate_content_hash(content) + file_path = os.path.join(self.content_paths[content_type], content_hash) + + # Only write if file doesn't exist + if not os.path.exists(file_path): + async with aiofiles.open(file_path, 'w', encoding='utf-8') as f: + await f.write(content) + + return content_hash + + async def _load_content(self, content_hash: str, content_type: str) -> Optional[str]: + """Load content from filesystem by hash""" + if not content_hash: + return None + + file_path = os.path.join(self.content_paths[content_type], content_hash) + try: + async with aiofiles.open(file_path, 'r', encoding='utf-8') as f: + return await f.read() + except: + self.logger.error( + message="Failed to load content: {file_path}", + tag="ERROR", + force_verbose=True, + params={"file_path": file_path} + ) + return None -async_db_manager = AsyncDatabaseManager() \ No newline at end of file +# Create a singleton instance +async_db_manager = AsyncDatabaseManager() diff --git a/crawl4ai/async_logger.py b/crawl4ai/async_logger.py new file mode 100644 index 0000000..220edd1 --- /dev/null +++ b/crawl4ai/async_logger.py @@ -0,0 +1,231 @@ +from enum import Enum +from typing import Optional, Dict, Any, Union +from colorama import Fore, Back, Style, init +import time +import os +from datetime import datetime + +class LogLevel(Enum): + DEBUG = 1 + INFO = 2 + SUCCESS = 3 + WARNING = 4 + ERROR = 5 + +class AsyncLogger: + """ + Asynchronous logger with support for colored console output and file logging. + Supports templated messages with colored components. + """ + + DEFAULT_ICONS = { + 'INIT': '→', + 'READY': '✓', + 'FETCH': '↓', + 'SCRAPE': '◆', + 'EXTRACT': '■', + 'COMPLETE': '●', + 'ERROR': '×', + 'DEBUG': '⋯', + 'INFO': 'ℹ', + 'WARNING': '⚠', + } + + DEFAULT_COLORS = { + LogLevel.DEBUG: Fore.LIGHTBLACK_EX, + LogLevel.INFO: Fore.CYAN, + LogLevel.SUCCESS: Fore.GREEN, + LogLevel.WARNING: Fore.YELLOW, + LogLevel.ERROR: Fore.RED, + } + + def __init__( + self, + log_file: Optional[str] = None, + log_level: LogLevel = LogLevel.INFO, + tag_width: int = 10, + icons: Optional[Dict[str, str]] = None, + colors: Optional[Dict[LogLevel, str]] = None, + verbose: bool = True + ): + """ + Initialize the logger. + + Args: + log_file: Optional file path for logging + log_level: Minimum log level to display + tag_width: Width for tag formatting + icons: Custom icons for different tags + colors: Custom colors for different log levels + verbose: Whether to output to console + """ + init() # Initialize colorama + self.log_file = log_file + self.log_level = log_level + self.tag_width = tag_width + self.icons = icons or self.DEFAULT_ICONS + self.colors = colors or self.DEFAULT_COLORS + self.verbose = verbose + + # Create log file directory if needed + if log_file: + os.makedirs(os.path.dirname(os.path.abspath(log_file)), exist_ok=True) + + def _format_tag(self, tag: str) -> str: + """Format a tag with consistent width.""" + return f"[{tag}]".ljust(self.tag_width, ".") + + def _get_icon(self, tag: str) -> str: + """Get the icon for a tag, defaulting to info icon if not found.""" + return self.icons.get(tag, self.icons['INFO']) + + def _write_to_file(self, message: str): + """Write a message to the log file if configured.""" + if self.log_file: + timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3] + with open(self.log_file, 'a', encoding='utf-8') as f: + # Strip ANSI color codes for file output + clean_message = message.replace(Fore.RESET, '').replace(Style.RESET_ALL, '') + for color in vars(Fore).values(): + if isinstance(color, str): + clean_message = clean_message.replace(color, '') + f.write(f"[{timestamp}] {clean_message}\n") + + def _log( + self, + level: LogLevel, + message: str, + tag: str, + params: Optional[Dict[str, Any]] = None, + colors: Optional[Dict[str, str]] = None, + base_color: Optional[str] = None, + **kwargs + ): + """ + Core logging method that handles message formatting and output. + + Args: + level: Log level for this message + message: Message template string + tag: Tag for the message + params: Parameters to format into the message + colors: Color overrides for specific parameters + base_color: Base color for the entire message + """ + if level.value < self.log_level.value: + return + + # Format the message with parameters if provided + if params: + try: + # First format the message with raw parameters + formatted_message = message.format(**params) + + # Then apply colors if specified + if colors: + for key, color in colors.items(): + # Find the formatted value in the message and wrap it with color + if key in params: + value_str = str(params[key]) + formatted_message = formatted_message.replace( + value_str, + f"{color}{value_str}{Style.RESET_ALL}" + ) + + except KeyError as e: + formatted_message = f"LOGGING ERROR: Missing parameter {e} in message template" + level = LogLevel.ERROR + else: + formatted_message = message + + # Construct the full log line + color = base_color or self.colors[level] + log_line = f"{color}{self._format_tag(tag)} {self._get_icon(tag)} {formatted_message}{Style.RESET_ALL}" + + # Output to console if verbose + if self.verbose or kwargs.get("force_verbose", False): + print(log_line) + + # Write to file if configured + self._write_to_file(log_line) + + def debug(self, message: str, tag: str = "DEBUG", **kwargs): + """Log a debug message.""" + self._log(LogLevel.DEBUG, message, tag, **kwargs) + + def info(self, message: str, tag: str = "INFO", **kwargs): + """Log an info message.""" + self._log(LogLevel.INFO, message, tag, **kwargs) + + def success(self, message: str, tag: str = "SUCCESS", **kwargs): + """Log a success message.""" + self._log(LogLevel.SUCCESS, message, tag, **kwargs) + + def warning(self, message: str, tag: str = "WARNING", **kwargs): + """Log a warning message.""" + self._log(LogLevel.WARNING, message, tag, **kwargs) + + def error(self, message: str, tag: str = "ERROR", **kwargs): + """Log an error message.""" + self._log(LogLevel.ERROR, message, tag, **kwargs) + + def url_status( + self, + url: str, + success: bool, + timing: float, + tag: str = "FETCH", + url_length: int = 50 + ): + """ + Convenience method for logging URL fetch status. + + Args: + url: The URL being processed + success: Whether the operation was successful + timing: Time taken for the operation + tag: Tag for the message + url_length: Maximum length for URL in log + """ + self._log( + level=LogLevel.SUCCESS if success else LogLevel.ERROR, + message="{url:.{url_length}}... | Status: {status} | Time: {timing:.2f}s", + tag=tag, + params={ + "url": url, + "url_length": url_length, + "status": success, + "timing": timing + }, + colors={ + "status": Fore.GREEN if success else Fore.RED, + "timing": Fore.YELLOW + } + ) + + def error_status( + self, + url: str, + error: str, + tag: str = "ERROR", + url_length: int = 50 + ): + """ + Convenience method for logging error status. + + Args: + url: The URL being processed + error: Error message + tag: Tag for the message + url_length: Maximum length for URL in log + """ + self._log( + level=LogLevel.ERROR, + message="{url:.{url_length}}... | Error: {error}", + tag=tag, + params={ + "url": url, + "url_length": url_length, + "error": error + } + ) \ No newline at end of file diff --git a/crawl4ai/async_webcrawler.py b/crawl4ai/async_webcrawler.py index ba82d28..b8be6f3 100644 --- a/crawl4ai/async_webcrawler.py +++ b/crawl4ai/async_webcrawler.py @@ -1,35 +1,107 @@ import os import time +import warnings +from enum import Enum +from colorama import init, Fore, Back, Style from pathlib import Path -from typing import Optional +from typing import Optional, List, Union import json import asyncio -from .models import CrawlResult +from .models import CrawlResult, MarkdownGenerationResult from .async_database import async_db_manager from .chunking_strategy import * +from .content_filter_strategy import * from .extraction_strategy import * from .async_crawler_strategy import AsyncCrawlerStrategy, AsyncPlaywrightCrawlerStrategy, AsyncCrawlResponse -from .content_scrapping_strategy import WebScrappingStrategy -from .config import MIN_WORD_THRESHOLD, IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD +from .cache_context import CacheMode, CacheContext, _legacy_to_cache_mode +from .content_scraping_strategy import WebScrapingStrategy +from .async_logger import AsyncLogger + +from .config import ( + MIN_WORD_THRESHOLD, + IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD, + URL_LOG_SHORTEN_LENGTH +) from .utils import ( sanitize_input_encode, InvalidCSSSelectorError, format_html ) +from urllib.parse import urlparse +import random +from .__version__ import __version__ as crawl4ai_version class AsyncWebCrawler: + """ + Asynchronous web crawler with flexible caching capabilities. + + Migration Guide (from version X.X.X): + Old way (deprecated): + crawler = AsyncWebCrawler(always_by_pass_cache=True) + result = await crawler.arun( + url="https://example.com", + bypass_cache=True, + no_cache_read=True, + no_cache_write=False + ) + + New way (recommended): + crawler = AsyncWebCrawler(always_bypass_cache=True) + result = await crawler.arun( + url="https://example.com", + cache_mode=CacheMode.WRITE_ONLY + ) + + To disable deprecation warnings: + Pass warning=False to suppress the warning. + """ + _domain_last_hit = {} + def __init__( self, crawler_strategy: Optional[AsyncCrawlerStrategy] = None, - always_by_pass_cache: bool = False, + always_bypass_cache: bool = False, + always_by_pass_cache: Optional[bool] = None, # Deprecated parameter + base_directory: str = str(os.getenv("CRAWL4_AI_BASE_DIRECTORY", Path.home())), **kwargs, ): + """ + Initialize the AsyncWebCrawler. + + Args: + crawler_strategy: Strategy for crawling web pages + always_bypass_cache: Whether to always bypass cache (new parameter) + always_by_pass_cache: Deprecated, use always_bypass_cache instead + base_directory: Base directory for storing cache + """ + self.verbose = kwargs.get("verbose", False) + self.logger = AsyncLogger( + log_file=os.path.join(base_directory, ".crawl4ai", "crawler.log"), + verbose=self.verbose, + tag_width=10 + ) + self.crawler_strategy = crawler_strategy or AsyncPlaywrightCrawlerStrategy( + logger = self.logger, **kwargs ) - self.always_by_pass_cache = always_by_pass_cache - self.crawl4ai_folder = os.path.join(Path.home(), ".crawl4ai") + + # Handle deprecated parameter + if always_by_pass_cache is not None: + if kwargs.get("warning", True): + warnings.warn( + "'always_by_pass_cache' is deprecated and will be removed in version X.X.X. " + "Use 'always_bypass_cache' instead. " + "Pass warning=False to suppress this warning.", + DeprecationWarning, + stacklevel=2 + ) + self.always_bypass_cache = always_by_pass_cache + else: + self.always_bypass_cache = always_bypass_cache + + self.crawl4ai_folder = os.path.join(base_directory, ".crawl4ai") os.makedirs(self.crawl4ai_folder, exist_ok=True) os.makedirs(f"{self.crawl4ai_folder}/cache", exist_ok=True) self.ready = False @@ -44,18 +116,14 @@ async def __aexit__(self, exc_type, exc_val, exc_tb): await self.crawler_strategy.__aexit__(exc_type, exc_val, exc_tb) async def awarmup(self): - if self.verbose: - print("[LOG] 🌤️ Warming up the AsyncWebCrawler") - await async_db_manager.ainit_db() - await self.arun( - url="https://google.com/", - word_count_threshold=5, - bypass_cache=False, - verbose=False, - ) + """Initialize the crawler with warm-up sequence.""" + self.logger.info(f"Crawl4AI {crawl4ai_version}", tag="INIT") + # if self.verbose: + # print(f"{Fore.CYAN}{self.tag_format('INIT')} {self.log_icons['INIT']} Crawl4AI {crawl4ai_version}{Style.RESET_ALL}") + # print(f"{Fore.CYAN}{self.tag_format('INIT')} {self.log_icons['INIT']} Warming up AsyncWebCrawler{Style.RESET_ALL}") self.ready = True - if self.verbose: - print("[LOG] 🌞 AsyncWebCrawler is ready to crawl") + # if self.verbose: + # print(f"{Fore.GREEN}{self.tag_format('READY')} {self.log_icons['READY']} AsyncWebCrawler initialized{Style.RESET_ALL}") async def arun( self, @@ -63,14 +131,82 @@ async def arun( word_count_threshold=MIN_WORD_THRESHOLD, extraction_strategy: ExtractionStrategy = None, chunking_strategy: ChunkingStrategy = RegexChunking(), + content_filter: RelevantContentFilter = None, + cache_mode: Optional[CacheMode] = None, + # Deprecated parameters bypass_cache: bool = False, + disable_cache: bool = False, + no_cache_read: bool = False, + no_cache_write: bool = False, + # Other parameters css_selector: str = None, screenshot: bool = False, user_agent: str = None, verbose=True, **kwargs, ) -> CrawlResult: + """ + Runs the crawler for a single source: URL (web, local file, or raw HTML). + + Migration from legacy cache parameters: + Old way (deprecated): + await crawler.arun(url, bypass_cache=True, no_cache_read=True) + + New way: + await crawler.arun(url, cache_mode=CacheMode.BYPASS) + + Args: + url: The URL to crawl (http://, https://, file://, or raw:) + cache_mode: Cache behavior control (recommended) + word_count_threshold: Minimum word count threshold + extraction_strategy: Strategy for content extraction + chunking_strategy: Strategy for content chunking + css_selector: CSS selector for content extraction + screenshot: Whether to capture screenshot + user_agent: Custom user agent + verbose: Enable verbose logging + + Deprecated Args: + bypass_cache: Use cache_mode=CacheMode.BYPASS instead + disable_cache: Use cache_mode=CacheMode.DISABLED instead + no_cache_read: Use cache_mode=CacheMode.WRITE_ONLY instead + no_cache_write: Use cache_mode=CacheMode.READ_ONLY instead + + Returns: + CrawlResult: The result of crawling and processing + """ try: + # Handle deprecated parameters + if any([bypass_cache, disable_cache, no_cache_read, no_cache_write]): + if kwargs.get("warning", True): + warnings.warn( + "Cache control boolean flags are deprecated and will be removed in version X.X.X. " + "Use 'cache_mode' parameter instead. Examples:\n" + "- For bypass_cache=True, use cache_mode=CacheMode.BYPASS\n" + "- For disable_cache=True, use cache_mode=CacheMode.DISABLED\n" + "- For no_cache_read=True, use cache_mode=CacheMode.WRITE_ONLY\n" + "- For no_cache_write=True, use cache_mode=CacheMode.READ_ONLY\n" + "Pass warning=False to suppress this warning.", + DeprecationWarning, + stacklevel=2 + ) + + # Convert legacy parameters if cache_mode not provided + if cache_mode is None: + cache_mode = _legacy_to_cache_mode( + disable_cache=disable_cache, + bypass_cache=bypass_cache, + no_cache_read=no_cache_read, + no_cache_write=no_cache_write + ) + + # Default to ENABLED if no cache mode specified + if cache_mode is None: + cache_mode = CacheMode.ENABLED + + # Create cache context + cache_context = CacheContext(url, cache_mode, self.always_bypass_cache) + extraction_strategy = extraction_strategy or NoExtractionStrategy() extraction_strategy.verbose = verbose if not isinstance(extraction_strategy, ExtractionStrategy): @@ -81,60 +217,126 @@ async def arun( word_count_threshold = max(word_count_threshold, MIN_WORD_THRESHOLD) async_response: AsyncCrawlResponse = None - cached = None + cached_result = None screenshot_data = None extracted_content = None - if not bypass_cache and not self.always_by_pass_cache: - cached = await async_db_manager.aget_cached_url(url) - - if kwargs.get("warmup", True) and not self.ready: - return None - - if cached: - html = sanitize_input_encode(cached[1]) - extracted_content = sanitize_input_encode(cached[4]) + + start_time = time.perf_counter() + + # Try to get cached result if appropriate + if cache_context.should_read(): + cached_result = await async_db_manager.aget_cached_url(url) + + if cached_result: + html = sanitize_input_encode(cached_result.html) + extracted_content = sanitize_input_encode(cached_result.extracted_content or "") if screenshot: - screenshot_data = cached[9] + screenshot_data = cached_result.screenshot if not screenshot_data: - cached = None + cached_result = None + # if verbose: + # print(f"{Fore.BLUE}{self.tag_format('FETCH')} {self.log_icons['FETCH']} Cache hit for {cache_context.display_url} | Status: {Fore.GREEN if bool(html) else Fore.RED}{bool(html)}{Style.RESET_ALL} | Time: {time.perf_counter() - start_time:.2f}s") + self.logger.url_status( + url=cache_context.display_url, + success=bool(html), + timing=time.perf_counter() - start_time, + tag="FETCH" + ) - if not cached or not html: - t1 = time.time() + + # Fetch fresh content if needed + if not cached_result or not html: + t1 = time.perf_counter() + if user_agent: self.crawler_strategy.update_user_agent(user_agent) - async_response: AsyncCrawlResponse = await self.crawler_strategy.crawl(url, screenshot=screenshot, **kwargs) + async_response: AsyncCrawlResponse = await self.crawler_strategy.crawl( + url, + screenshot=screenshot, + **kwargs + ) html = sanitize_input_encode(async_response.html) screenshot_data = async_response.screenshot - t2 = time.time() - if verbose: - print( - f"[LOG] 🚀 Crawling done for {url}, success: {bool(html)}, time taken: {t2 - t1:.2f} seconds" - ) + t2 = time.perf_counter() + self.logger.url_status( + url=cache_context.display_url, + success=bool(html), + timing=t2 - t1, + tag="FETCH" + ) + # if verbose: + # print(f"{Fore.BLUE}{self.tag_format('FETCH')} {self.log_icons['FETCH']} Live fetch for {cache_context.display_url}... | Status: {Fore.GREEN if bool(html) else Fore.RED}{bool(html)}{Style.RESET_ALL} | Time: {t2 - t1:.2f}s") + # Process the HTML content crawl_result = await self.aprocess_html( - url, - html, - extracted_content, - word_count_threshold, - extraction_strategy, - chunking_strategy, - css_selector, - screenshot_data, - verbose, - bool(cached), + url=url, + html=html, + extracted_content=extracted_content, + word_count_threshold=word_count_threshold, + extraction_strategy=extraction_strategy, + chunking_strategy=chunking_strategy, + content_filter=content_filter, + css_selector=css_selector, + screenshot=screenshot_data, + verbose=verbose, + is_cached=bool(cached_result), async_response=async_response, + is_web_url=cache_context.is_web_url, + is_local_file=cache_context.is_local_file, + is_raw_html=cache_context.is_raw_html, **kwargs, ) - crawl_result.status_code = async_response.status_code if async_response else 200 - crawl_result.response_headers = async_response.response_headers if async_response else {} + + # Set response data + if async_response: + crawl_result.status_code = async_response.status_code + crawl_result.response_headers = async_response.response_headers + crawl_result.downloaded_files = async_response.downloaded_files + else: + crawl_result.status_code = 200 + crawl_result.response_headers = cached_result.response_headers if cached_result else {} + crawl_result.success = bool(html) crawl_result.session_id = kwargs.get("session_id", None) + + # if verbose: + # print(f"{Fore.GREEN}{self.tag_format('COMPLETE')} {self.log_icons['COMPLETE']} {cache_context.display_url[:URL_LOG_SHORTEN_LENGTH]}... | Status: {Fore.GREEN if crawl_result.success else Fore.RED}{crawl_result.success} | {Fore.YELLOW}Total: {time.perf_counter() - start_time:.2f}s{Style.RESET_ALL}") + self.logger.success( + message="{url:.50}... | Status: {status} | Total: {timing}", + tag="COMPLETE", + params={ + "url": cache_context.display_url, + "status": crawl_result.success, + "timing": f"{time.perf_counter() - start_time:.2f}s" + }, + colors={ + "status": Fore.GREEN if crawl_result.success else Fore.RED, + "timing": Fore.YELLOW + } + ) + + # Update cache if appropriate + if cache_context.should_write() and not bool(cached_result): + await async_db_manager.acache_url(crawl_result) + return crawl_result + except Exception as e: if not hasattr(e, "msg"): e.msg = str(e) - print(f"[ERROR] 🚫 Failed to crawl {url}, error: {e.msg}") - return CrawlResult(url=url, html="", success=False, error_message=e.msg) + # print(f"{Fore.RED}{self.tag_format('ERROR')} {self.log_icons['ERROR']} Failed to crawl {cache_context.display_url[:URL_LOG_SHORTEN_LENGTH]}... | {e.msg}{Style.RESET_ALL}") + self.logger.error_status( + url=cache_context.display_url, + error=e.msg, + tag="ERROR" + ) + return CrawlResult( + url=url, + html="", + markdown=f"[ERROR] 🚫 arun(): Failed to crawl {cache_context.display_url}, error: {e.msg}", + success=False, + error_message=e.msg + ) async def arun_many( self, @@ -142,6 +344,9 @@ async def arun_many( word_count_threshold=MIN_WORD_THRESHOLD, extraction_strategy: ExtractionStrategy = None, chunking_strategy: ChunkingStrategy = RegexChunking(), + content_filter: RelevantContentFilter = None, + cache_mode: Optional[CacheMode] = None, + # Deprecated parameters bypass_cache: bool = False, css_selector: str = None, screenshot: bool = False, @@ -149,22 +354,101 @@ async def arun_many( verbose=True, **kwargs, ) -> List[CrawlResult]: - tasks = [ - self.arun( - url, - word_count_threshold, - extraction_strategy, - chunking_strategy, - bypass_cache, - css_selector, - screenshot, - user_agent, - verbose, - **kwargs - ) - for url in urls - ] - return await asyncio.gather(*tasks) + """ + Runs the crawler for multiple URLs concurrently. + + Migration from legacy parameters: + Old way (deprecated): + results = await crawler.arun_many(urls, bypass_cache=True) + + New way: + results = await crawler.arun_many(urls, cache_mode=CacheMode.BYPASS) + + Args: + urls: List of URLs to crawl + cache_mode: Cache behavior control (recommended) + [other parameters same as arun()] + + Returns: + List[CrawlResult]: Results for each URL + """ + if bypass_cache: + if kwargs.get("warning", True): + warnings.warn( + "'bypass_cache' is deprecated and will be removed in version X.X.X. " + "Use 'cache_mode=CacheMode.BYPASS' instead. " + "Pass warning=False to suppress this warning.", + DeprecationWarning, + stacklevel=2 + ) + if cache_mode is None: + cache_mode = CacheMode.BYPASS + + semaphore_count = kwargs.get('semaphore_count', 10) + semaphore = asyncio.Semaphore(semaphore_count) + + async def crawl_with_semaphore(url): + domain = urlparse(url).netloc + current_time = time.time() + + # print(f"{Fore.LIGHTBLACK_EX}{self.tag_format('PARALLEL')} Started task for {url[:50]}...{Style.RESET_ALL}") + self.logger.debug( + message="Started task for {url:.50}...", + tag="PARALLEL", + params={"url": url} + ) + + # Get delay settings from kwargs or use defaults + mean_delay = kwargs.get('mean_delay', 0.1) # 0.5 seconds default mean delay + max_range = kwargs.get('max_range', 0.3) # 1 seconds default max additional delay + + # Check if we need to wait + if domain in self._domain_last_hit: + time_since_last = current_time - self._domain_last_hit[domain] + if time_since_last < mean_delay: + delay = mean_delay + random.uniform(0, max_range) + await asyncio.sleep(delay) + + # Update last hit time + self._domain_last_hit[domain] = current_time + + async with semaphore: + return await self.arun( + url, + word_count_threshold=word_count_threshold, + extraction_strategy=extraction_strategy, + chunking_strategy=chunking_strategy, + content_filter=content_filter, + cache_mode=cache_mode, + css_selector=css_selector, + screenshot=screenshot, + user_agent=user_agent, + verbose=verbose, + **kwargs, + ) + + # Print start message + # print(f"{Fore.CYAN}{self.tag_format('INIT')} {self.log_icons['INIT']} Starting concurrent crawling for {len(urls)} URLs...{Style.RESET_ALL}") + self.logger.info( + message="Starting concurrent crawling for {count} URLs...", + tag="INIT", + params={"count": len(urls)} + ) + start_time = time.perf_counter() + tasks = [crawl_with_semaphore(url) for url in urls] + results = await asyncio.gather(*tasks, return_exceptions=True) + end_time = time.perf_counter() + # print(f"{Fore.YELLOW}{self.tag_format('COMPLETE')} {self.log_icons['COMPLETE']} Concurrent crawling completed for {len(urls)} URLs | Total time: {end_time - start_time:.2f}s{Style.RESET_ALL}") + self.logger.success( + message="Concurrent crawling completed for {count} URLs | " + Fore.YELLOW + " Total time: {timing}" + Style.RESET_ALL, + tag="COMPLETE", + params={ + "count": len(urls), + "timing": f"{end_time - start_time:.2f}s" + }, + colors={"timing": Fore.YELLOW} + ) + return [result if not isinstance(result, Exception) else str(result) for result in results] async def aprocess_html( @@ -175,31 +459,30 @@ async def aprocess_html( word_count_threshold: int, extraction_strategy: ExtractionStrategy, chunking_strategy: ChunkingStrategy, + content_filter: RelevantContentFilter, css_selector: str, screenshot: str, verbose: bool, - is_cached: bool, **kwargs, ) -> CrawlResult: - t = time.time() # Extract content from HTML try: - t1 = time.time() - scrapping_strategy = WebScrappingStrategy() - result = await scrapping_strategy.ascrap( + _url = url if not kwargs.get("is_raw_html", False) else "Raw HTML" + t1 = time.perf_counter() + scrapping_strategy = WebScrapingStrategy() + # result = await scrapping_strategy.ascrap( + result = scrapping_strategy.scrap( url, html, word_count_threshold=word_count_threshold, css_selector=css_selector, - only_text=kwargs.get("only_text", False), - image_description_min_word_threshold=kwargs.get( + only_text=kwargs.pop("only_text", False), + image_description_min_word_threshold=kwargs.pop( "image_description_min_word_threshold", IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD ), + content_filter = content_filter, + **kwargs, ) - if verbose: - print( - f"[LOG] 🚀 Content extracted for {url}, success: True, time taken: {time.time() - t1:.2f} seconds" - ) if result is None: raise ValueError(f"Process HTML, Failed to extract content from the website: {url}") @@ -208,18 +491,30 @@ async def aprocess_html( except Exception as e: raise ValueError(f"Process HTML, Failed to extract content from the website: {url}, error: {str(e)}") + markdown_v2: MarkdownGenerationResult = result.get("markdown_v2", None) + cleaned_html = sanitize_input_encode(result.get("cleaned_html", "")) markdown = sanitize_input_encode(result.get("markdown", "")) + fit_markdown = sanitize_input_encode(result.get("fit_markdown", "")) + fit_html = sanitize_input_encode(result.get("fit_html", "")) media = result.get("media", []) links = result.get("links", []) metadata = result.get("metadata", {}) + + # if verbose: + # print(f"{Fore.MAGENTA}{self.tag_format('SCRAPE')} {self.log_icons['SCRAPE']} Processed {_url[:URL_LOG_SHORTEN_LENGTH]}...{Style.RESET_ALL} | Time: {int((time.perf_counter() - t1) * 1000)}ms") + self.logger.info( + message="Processed {url:.50}... | Time: {timing}ms", + tag="SCRAPE", + params={ + "url": _url, + "timing": int((time.perf_counter() - t1) * 1000) + } + ) - if extracted_content is None and extraction_strategy and chunking_strategy: - if verbose: - print( - f"[LOG] 🔥 Extracting semantic blocks for {url}, Strategy: {self.__class__.__name__}" - ) + if extracted_content is None and extraction_strategy and chunking_strategy and not isinstance(extraction_strategy, NoExtractionStrategy): + t1 = time.perf_counter() # Check if extraction strategy is type of JsonCssExtractionStrategy if isinstance(extraction_strategy, JsonCssExtractionStrategy) or isinstance(extraction_strategy, JsonCssExtractionStrategy): extraction_strategy.verbose = verbose @@ -229,33 +524,30 @@ async def aprocess_html( sections = chunking_strategy.chunk(markdown) extracted_content = extraction_strategy.run(url, sections) extracted_content = json.dumps(extracted_content, indent=4, default=str, ensure_ascii=False) - - if verbose: - print( - f"[LOG] 🚀 Extraction done for {url}, time taken: {time.time() - t:.2f} seconds." + # if verbose: + # print(f"{Fore.YELLOW}{self.tag_format('EXTRACT')} {self.log_icons['EXTRACT']} Completed for {_url[:URL_LOG_SHORTEN_LENGTH]}...{Style.RESET_ALL} | Time: {time.perf_counter() - t1:.2f}s{Style.RESET_ALL}") + self.logger.info( + message="Completed for {url:.50}... | Time: {timing}s", + tag="EXTRACT", + params={ + "url": _url, + "timing": time.perf_counter() - t1 + } ) + - screenshot = None if not screenshot else screenshot - - if not is_cached: - await async_db_manager.acache_url( - url, - html, - cleaned_html, - markdown, - extracted_content, - True, - json.dumps(media), - json.dumps(links), - json.dumps(metadata), - screenshot=screenshot, - ) + + screenshot = None if not screenshot else screenshot + return CrawlResult( url=url, html=html, cleaned_html=format_html(cleaned_html), + markdown_v2=markdown_v2, markdown=markdown, + fit_markdown=fit_markdown, + fit_html= fit_html, media=media, links=links, metadata=metadata, @@ -266,10 +558,15 @@ async def aprocess_html( ) async def aclear_cache(self): - await async_db_manager.aclear_db() + """Clear the cache database.""" + await async_db_manager.cleanup() async def aflush_cache(self): + """Flush the cache database.""" await async_db_manager.aflush_db() async def aget_cache_size(self): + """Get the total number of cached items.""" return await async_db_manager.aget_total_count() + + diff --git a/crawl4ai/cache_context.py b/crawl4ai/cache_context.py new file mode 100644 index 0000000..429eacc --- /dev/null +++ b/crawl4ai/cache_context.py @@ -0,0 +1,79 @@ +from enum import Enum + + +class CacheMode(Enum): + """ + Defines the caching behavior for web crawling operations. + + Modes: + - ENABLED: Normal caching behavior (read and write) + - DISABLED: No caching at all + - READ_ONLY: Only read from cache, don't write + - WRITE_ONLY: Only write to cache, don't read + - BYPASS: Bypass cache for this operation + """ + ENABLED = "enabled" + DISABLED = "disabled" + READ_ONLY = "read_only" + WRITE_ONLY = "write_only" + BYPASS = "bypass" + + +class CacheContext: + """ + Encapsulates cache-related decisions and URL handling. + + This class centralizes all cache-related logic and URL type checking, + making the caching behavior more predictable and maintainable. + """ + def __init__(self, url: str, cache_mode: CacheMode, always_bypass: bool = False): + self.url = url + self.cache_mode = cache_mode + self.always_bypass = always_bypass + self.is_cacheable = url.startswith(('http://', 'https://', 'file://')) + self.is_web_url = url.startswith(('http://', 'https://')) + self.is_local_file = url.startswith("file://") + self.is_raw_html = url.startswith("raw:") + self._url_display = url if not self.is_raw_html else "Raw HTML" + + def should_read(self) -> bool: + """Determines if cache should be read based on context.""" + if self.always_bypass or not self.is_cacheable: + return False + return self.cache_mode in [CacheMode.ENABLED, CacheMode.READ_ONLY] + + def should_write(self) -> bool: + """Determines if cache should be written based on context.""" + if self.always_bypass or not self.is_cacheable: + return False + return self.cache_mode in [CacheMode.ENABLED, CacheMode.WRITE_ONLY] + + @property + def display_url(self) -> str: + """Returns the URL in display format.""" + return self._url_display + + +def _legacy_to_cache_mode( + disable_cache: bool = False, + bypass_cache: bool = False, + no_cache_read: bool = False, + no_cache_write: bool = False +) -> CacheMode: + """ + Converts legacy cache parameters to the new CacheMode enum. + + This is an internal function to help transition from the old boolean flags + to the new CacheMode system. + """ + if disable_cache: + return CacheMode.DISABLED + if bypass_cache: + return CacheMode.BYPASS + if no_cache_read and no_cache_write: + return CacheMode.DISABLED + if no_cache_read: + return CacheMode.WRITE_ONLY + if no_cache_write: + return CacheMode.READ_ONLY + return CacheMode.ENABLED diff --git a/crawl4ai/chunking_strategy.py b/crawl4ai/chunking_strategy.py index d16e4f4..af85794 100644 --- a/crawl4ai/chunking_strategy.py +++ b/crawl4ai/chunking_strategy.py @@ -84,6 +84,12 @@ def chunk_with_topics(self, text: str) -> list: # Fixed-length word chunks class FixedLengthWordChunking(ChunkingStrategy): def __init__(self, chunk_size=100, **kwargs): + """ + Initialize the fixed-length word chunking strategy with the given chunk size. + + Args: + chunk_size (int): The size of each chunk in words. + """ self.chunk_size = chunk_size def chunk(self, text: str) -> list: @@ -93,14 +99,64 @@ def chunk(self, text: str) -> list: # Sliding window chunking class SlidingWindowChunking(ChunkingStrategy): def __init__(self, window_size=100, step=50, **kwargs): + """ + Initialize the sliding window chunking strategy with the given window size and + step size. + + Args: + window_size (int): The size of the sliding window in words. + step (int): The step size for sliding the window in words. + """ self.window_size = window_size self.step = step def chunk(self, text: str) -> list: words = text.split() chunks = [] - for i in range(0, len(words), self.step): - chunks.append(' '.join(words[i:i + self.window_size])) + + if len(words) <= self.window_size: + return [text] + + for i in range(0, len(words) - self.window_size + 1, self.step): + chunk = ' '.join(words[i:i + self.window_size]) + chunks.append(chunk) + + # Handle the last chunk if it doesn't align perfectly + if i + self.window_size < len(words): + chunks.append(' '.join(words[-self.window_size:])) + return chunks +class OverlappingWindowChunking(ChunkingStrategy): + def __init__(self, window_size=1000, overlap=100, **kwargs): + """ + Initialize the overlapping window chunking strategy with the given window size and + overlap size. + + Args: + window_size (int): The size of the window in words. + overlap (int): The size of the overlap between consecutive chunks in words. + """ + self.window_size = window_size + self.overlap = overlap + + def chunk(self, text: str) -> list: + words = text.split() + chunks = [] + + if len(words) <= self.window_size: + return [text] + + start = 0 + while start < len(words): + end = start + self.window_size + chunk = ' '.join(words[start:end]) + chunks.append(chunk) + + if end >= len(words): + break + + start = end - self.overlap + + return chunks \ No newline at end of file diff --git a/crawl4ai/config.py b/crawl4ai/config.py index 00b1eb4..786ca4e 100644 --- a/crawl4ai/config.py +++ b/crawl4ai/config.py @@ -4,24 +4,23 @@ load_dotenv() # Load environment variables from .env file # Default provider, ONLY used when the extraction strategy is LLMExtractionStrategy -DEFAULT_PROVIDER = "openai/gpt-4-turbo" +DEFAULT_PROVIDER = "openai/gpt-4o-mini" MODEL_REPO_BRANCH = "new-release-0.0.2" # Provider-model dictionary, ONLY used when the extraction strategy is LLMExtractionStrategy PROVIDER_MODELS = { "ollama/llama3": "no-token-needed", # Any model from Ollama no need for API token "groq/llama3-70b-8192": os.getenv("GROQ_API_KEY"), "groq/llama3-8b-8192": os.getenv("GROQ_API_KEY"), - "openai/gpt-3.5-turbo": os.getenv("OPENAI_API_KEY"), - "openai/gpt-4-turbo": os.getenv("OPENAI_API_KEY"), + "openai/gpt-4o-mini": os.getenv("OPENAI_API_KEY"), "openai/gpt-4o": os.getenv("OPENAI_API_KEY"), "anthropic/claude-3-haiku-20240307": os.getenv("ANTHROPIC_API_KEY"), "anthropic/claude-3-opus-20240229": os.getenv("ANTHROPIC_API_KEY"), "anthropic/claude-3-sonnet-20240229": os.getenv("ANTHROPIC_API_KEY"), + "anthropic/claude-3-5-sonnet-20240620": os.getenv("ANTHROPIC_API_KEY"), } - # Chunk token threshold -CHUNK_TOKEN_THRESHOLD = 500 +CHUNK_TOKEN_THRESHOLD = 2 ** 11 # 2048 tokens OVERLAP_RATE = 0.1 WORD_TOKEN_RATE = 1.3 @@ -29,6 +28,20 @@ MIN_WORD_THRESHOLD = 1 IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD = 1 +IMPORTANT_ATTRS = ['src', 'href', 'alt', 'title', 'width', 'height'] +ONLY_TEXT_ELIGIBLE_TAGS = ['b', 'i', 'u', 'span', 'del', 'ins', 'sub', 'sup', 'strong', 'em', 'code', 'kbd', 'var', 's', 'q', 'abbr', 'cite', 'dfn', 'time', 'small', 'mark'] +SOCIAL_MEDIA_DOMAINS = [ + 'facebook.com', + 'twitter.com', + 'x.com', + 'linkedin.com', + 'instagram.com', + 'pinterest.com', + 'tiktok.com', + 'snapchat.com', + 'reddit.com', + ] + # Threshold for the Image extraction - Range is 1 to 6 # Images are scored based on point based system, to filter based on usefulness. Points are assigned # to each image based on the following aspects. @@ -38,3 +51,9 @@ # If image format is in jpg, png or webp # If image is in the first half of the total images extracted from the page IMAGE_SCORE_THRESHOLD = 2 + +MAX_METRICS_HISTORY = 1000 + +NEED_MIGRATION = True +URL_LOG_SHORTEN_LENGTH = 30 +SHOW_DEPRECATION_WARNINGS = True \ No newline at end of file diff --git a/crawl4ai/content_filter_strategy.py b/crawl4ai/content_filter_strategy.py new file mode 100644 index 0000000..88216f7 --- /dev/null +++ b/crawl4ai/content_filter_strategy.py @@ -0,0 +1,328 @@ +import re +from bs4 import BeautifulSoup, Tag +from typing import List, Tuple, Dict +from rank_bm25 import BM25Okapi +from time import perf_counter +from collections import deque +from bs4 import BeautifulSoup, NavigableString, Tag +from .utils import clean_tokens +from abc import ABC, abstractmethod + +from snowballstemmer import stemmer + +# from nltk.stem import PorterStemmer +# ps = PorterStemmer() +class RelevantContentFilter(ABC): + def __init__(self, user_query: str = None): + self.user_query = user_query + self.included_tags = { + # Primary structure + 'article', 'main', 'section', 'div', + # List structures + 'ul', 'ol', 'li', 'dl', 'dt', 'dd', + # Text content + 'p', 'span', 'blockquote', 'pre', 'code', + # Headers + 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', + # Tables + 'table', 'thead', 'tbody', 'tr', 'td', 'th', + # Other semantic elements + 'figure', 'figcaption', 'details', 'summary', + # Text formatting + 'em', 'strong', 'b', 'i', 'mark', 'small', + # Rich content + 'time', 'address', 'cite', 'q' + } + self.excluded_tags = { + 'nav', 'footer', 'header', 'aside', 'script', + 'style', 'form', 'iframe', 'noscript' + } + self.header_tags = {'h1', 'h2', 'h3', 'h4', 'h5', 'h6'} + self.negative_patterns = re.compile( + r'nav|footer|header|sidebar|ads|comment|promo|advert|social|share', + re.I + ) + self.min_word_count = 2 + + @abstractmethod + def filter_content(self, html: str) -> List[str]: + """Abstract method to be implemented by specific filtering strategies""" + pass + + def extract_page_query(self, soup: BeautifulSoup, body: Tag) -> str: + """Common method to extract page metadata with fallbacks""" + if self.user_query: + return self.user_query + + query_parts = [] + + # Title + if soup.title: + query_parts.append(soup.title.string) + elif soup.find('h1'): + query_parts.append(soup.find('h1').get_text()) + + # Meta tags + temp = "" + for meta_name in ['keywords', 'description']: + meta = soup.find('meta', attrs={'name': meta_name}) + if meta and meta.get('content'): + query_parts.append(meta['content']) + temp += meta['content'] + + # If still empty, grab first significant paragraph + if not temp: + # Find the first tag P thatits text contains more than 50 characters + for p in body.find_all('p'): + if len(p.get_text()) > 150: + query_parts.append(p.get_text()[:150]) + break + + return ' '.join(filter(None, query_parts)) + + + def extract_text_chunks(self, body: Tag) -> List[Tuple[str, str]]: + """ + Extracts text chunks from a BeautifulSoup body element while preserving order. + Returns list of tuples (text, tag_name) for classification. + + Args: + body: BeautifulSoup Tag object representing the body element + + Returns: + List of (text, tag_name) tuples + """ + # Tags to ignore - inline elements that shouldn't break text flow + INLINE_TAGS = { + 'a', 'abbr', 'acronym', 'b', 'bdo', 'big', 'br', 'button', 'cite', 'code', + 'dfn', 'em', 'i', 'img', 'input', 'kbd', 'label', 'map', 'object', 'q', + 'samp', 'script', 'select', 'small', 'span', 'strong', 'sub', 'sup', + 'textarea', 'time', 'tt', 'var' + } + + # Tags that typically contain meaningful headers + HEADER_TAGS = {'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header'} + + chunks = [] + current_text = [] + chunk_index = 0 + + def should_break_chunk(tag: Tag) -> bool: + """Determine if a tag should cause a break in the current text chunk""" + return ( + tag.name not in INLINE_TAGS + and not (tag.name == 'p' and len(current_text) == 0) + ) + + # Use deque for efficient push/pop operations + stack = deque([(body, False)]) + + while stack: + element, visited = stack.pop() + + if visited: + # End of block element - flush accumulated text + if current_text and should_break_chunk(element): + text = ' '.join(''.join(current_text).split()) + if text: + tag_type = 'header' if element.name in HEADER_TAGS else 'content' + chunks.append((chunk_index, text, tag_type, element)) + chunk_index += 1 + current_text = [] + continue + + if isinstance(element, NavigableString): + if str(element).strip(): + current_text.append(str(element).strip()) + continue + + # Pre-allocate children to avoid multiple list operations + children = list(element.children) + if not children: + continue + + # Mark block for revisit after processing children + stack.append((element, True)) + + # Add children in reverse order for correct processing + for child in reversed(children): + if isinstance(child, (Tag, NavigableString)): + stack.append((child, False)) + + # Handle any remaining text + if current_text: + text = ' '.join(''.join(current_text).split()) + if text: + chunks.append((chunk_index, text, 'content', body)) + + return chunks + + + def extract_text_chunks1(self, soup: BeautifulSoup) -> List[Tuple[int, str, Tag]]: + """Common method for extracting text chunks""" + _text_cache = {} + def fast_text(element: Tag) -> str: + elem_id = id(element) + if elem_id in _text_cache: + return _text_cache[elem_id] + texts = [] + for content in element.contents: + if isinstance(content, str): + text = content.strip() + if text: + texts.append(text) + result = ' '.join(texts) + _text_cache[elem_id] = result + return result + + candidates = [] + index = 0 + + def dfs(element): + nonlocal index + if isinstance(element, Tag): + if element.name in self.included_tags: + if not self.is_excluded(element): + text = fast_text(element) + word_count = len(text.split()) + + # Headers pass through with adjusted minimum + if element.name in self.header_tags: + if word_count >= 3: # Minimal sanity check for headers + candidates.append((index, text, element)) + index += 1 + # Regular content uses standard minimum + elif word_count >= self.min_word_count: + candidates.append((index, text, element)) + index += 1 + + for child in element.children: + dfs(child) + + dfs(soup.body if soup.body else soup) + return candidates + + def is_excluded(self, tag: Tag) -> bool: + """Common method for exclusion logic""" + if tag.name in self.excluded_tags: + return True + class_id = ' '.join(filter(None, [ + ' '.join(tag.get('class', [])), + tag.get('id', '') + ])) + return bool(self.negative_patterns.search(class_id)) + + def clean_element(self, tag: Tag) -> str: + """Common method for cleaning HTML elements with minimal overhead""" + if not tag or not isinstance(tag, Tag): + return "" + + unwanted_tags = {'script', 'style', 'aside', 'form', 'iframe', 'noscript'} + unwanted_attrs = {'style', 'onclick', 'onmouseover', 'align', 'bgcolor', 'class', 'id'} + + # Use string builder pattern for better performance + builder = [] + + def render_tag(elem): + if not isinstance(elem, Tag): + if isinstance(elem, str): + builder.append(elem.strip()) + return + + if elem.name in unwanted_tags: + return + + # Start tag + builder.append(f'<{elem.name}') + + # Add cleaned attributes + attrs = {k: v for k, v in elem.attrs.items() if k not in unwanted_attrs} + for key, value in attrs.items(): + builder.append(f' {key}="{value}"') + + builder.append('>') + + # Process children + for child in elem.children: + render_tag(child) + + # Close tag + builder.append(f'') + + try: + render_tag(tag) + return ''.join(builder) + except Exception: + return str(tag) # Fallback to original if anything fails + +class BM25ContentFilter(RelevantContentFilter): + def __init__(self, user_query: str = None, bm25_threshold: float = 1.0, language: str = 'english'): + super().__init__(user_query=user_query) + self.bm25_threshold = bm25_threshold + self.priority_tags = { + 'h1': 5.0, + 'h2': 4.0, + 'h3': 3.0, + 'title': 4.0, + 'strong': 2.0, + 'b': 1.5, + 'em': 1.5, + 'blockquote': 2.0, + 'code': 2.0, + 'pre': 1.5, + 'th': 1.5, # Table headers + } + self.stemmer = stemmer(language) + + def filter_content(self, html: str) -> List[str]: + """Implements content filtering using BM25 algorithm with priority tag handling""" + if not html or not isinstance(html, str): + return [] + + soup = BeautifulSoup(html, 'lxml') + body = soup.find('body') + query = self.extract_page_query(soup.find('head'), body) + candidates = self.extract_text_chunks(body) + + if not candidates: + return [] + + # Tokenize corpus + # tokenized_corpus = [chunk.lower().split() for _, chunk, _, _ in candidates] + # tokenized_query = query.lower().split() + + # tokenized_corpus = [[ps.stem(word) for word in chunk.lower().split()] + # for _, chunk, _, _ in candidates] + # tokenized_query = [ps.stem(word) for word in query.lower().split()] + + tokenized_corpus = [[self.stemmer.stemWord(word) for word in chunk.lower().split()] + for _, chunk, _, _ in candidates] + tokenized_query = [self.stemmer.stemWord(word) for word in query.lower().split()] + + # Clean from stop words and noise + tokenized_corpus = [clean_tokens(tokens) for tokens in tokenized_corpus] + tokenized_query = clean_tokens(tokenized_query) + + bm25 = BM25Okapi(tokenized_corpus) + scores = bm25.get_scores(tokenized_query) + + # Adjust scores with tag weights + adjusted_candidates = [] + for score, (index, chunk, tag_type, tag) in zip(scores, candidates): + tag_weight = self.priority_tags.get(tag.name, 1.0) + adjusted_score = score * tag_weight + adjusted_candidates.append((adjusted_score, index, chunk, tag)) + + # Filter candidates by threshold + selected_candidates = [ + (index, chunk, tag) for adjusted_score, index, chunk, tag in adjusted_candidates + if adjusted_score >= self.bm25_threshold + ] + + if not selected_candidates: + return [] + + # Sort selected candidates by original document order + selected_candidates.sort(key=lambda x: x[0]) + + return [self.clean_element(tag) for _, _, tag in selected_candidates] diff --git a/crawl4ai/content_scraping_strategy.py b/crawl4ai/content_scraping_strategy.py new file mode 100644 index 0000000..d4b901d --- /dev/null +++ b/crawl4ai/content_scraping_strategy.py @@ -0,0 +1,687 @@ +import re # Point 1: Pre-Compile Regular Expressions +from abc import ABC, abstractmethod +from typing import Dict, Any, Optional +from bs4 import BeautifulSoup +from concurrent.futures import ThreadPoolExecutor +import asyncio, requests, re, os +from .config import * +from bs4 import element, NavigableString, Comment +from urllib.parse import urljoin +from requests.exceptions import InvalidSchema +# from .content_cleaning_strategy import ContentCleaningStrategy +from .content_filter_strategy import RelevantContentFilter, BM25ContentFilter +from .markdown_generation_strategy import MarkdownGenerationStrategy, DefaultMarkdownGenerationStrategy +from .models import MarkdownGenerationResult +from .utils import ( + sanitize_input_encode, + sanitize_html, + extract_metadata, + InvalidCSSSelectorError, + CustomHTML2Text, + normalize_url, + is_external_url +) +from .tools import profile_and_time + +# Pre-compile regular expressions for Open Graph and Twitter metadata +OG_REGEX = re.compile(r'^og:') +TWITTER_REGEX = re.compile(r'^twitter:') +DIMENSION_REGEX = re.compile(r"(\d+)(\D*)") + +# Function to parse image height/width value and units +def parse_dimension(dimension): + if dimension: + # match = re.match(r"(\d+)(\D*)", dimension) + match = DIMENSION_REGEX.match(dimension) + if match: + number = int(match.group(1)) + unit = match.group(2) or 'px' # Default unit is 'px' if not specified + return number, unit + return None, None + +# Fetch image file metadata to extract size and extension +def fetch_image_file_size(img, base_url): + #If src is relative path construct full URL, if not it may be CDN URL + img_url = urljoin(base_url,img.get('src')) + try: + response = requests.head(img_url) + if response.status_code == 200: + return response.headers.get('Content-Length',None) + else: + print(f"Failed to retrieve file size for {img_url}") + return None + except InvalidSchema as e: + return None + finally: + return + +class ContentScrapingStrategy(ABC): + @abstractmethod + def scrap(self, url: str, html: str, **kwargs) -> Dict[str, Any]: + pass + + @abstractmethod + async def ascrap(self, url: str, html: str, **kwargs) -> Dict[str, Any]: + pass + +class WebScrapingStrategy(ContentScrapingStrategy): + def __init__(self, logger=None): + self.logger = logger + + def _log(self, level, message, tag="SCRAPE", **kwargs): + """Helper method to safely use logger.""" + if self.logger: + log_method = getattr(self.logger, level) + log_method(message=message, tag=tag, **kwargs) + + def scrap(self, url: str, html: str, **kwargs) -> Dict[str, Any]: + return self._get_content_of_website_optimized(url, html, is_async=False, **kwargs) + + async def ascrap(self, url: str, html: str, **kwargs) -> Dict[str, Any]: + return await asyncio.to_thread(self._get_content_of_website_optimized, url, html, **kwargs) + + + def _generate_markdown_content(self, + cleaned_html: str, + html: str, + url: str, + success: bool, + **kwargs) -> Dict[str, Any]: + """Generate markdown content using either new strategy or legacy method. + + Args: + cleaned_html: Sanitized HTML content + html: Original HTML content + url: Base URL of the page + success: Whether scraping was successful + **kwargs: Additional options including: + - markdown_generator: Optional[MarkdownGenerationStrategy] + - html2text: Dict[str, Any] options for HTML2Text + - content_filter: Optional[RelevantContentFilter] + - fit_markdown: bool + - fit_markdown_user_query: Optional[str] + - fit_markdown_bm25_threshold: float + + Returns: + Dict containing markdown content in various formats + """ + markdown_generator: Optional[MarkdownGenerationStrategy] = kwargs.get('markdown_generator', DefaultMarkdownGenerationStrategy()) + + if markdown_generator: + try: + markdown_result: MarkdownGenerationResult = markdown_generator.generate_markdown( + cleaned_html=cleaned_html, + base_url=url, + html2text_options=kwargs.get('html2text', {}), + content_filter=kwargs.get('content_filter', None) + ) + + return { + 'markdown': markdown_result.raw_markdown, + 'fit_markdown': markdown_result.fit_markdown or "Set flag 'fit_markdown' to True to get cleaned HTML content.", + 'fit_html': markdown_result.fit_html or "Set flag 'fit_markdown' to True to get cleaned HTML content.", + 'markdown_v2': markdown_result + } + except Exception as e: + self._log('error', + message="Error using new markdown generation strategy: {error}", + tag="SCRAPE", + params={"error": str(e)} + ) + markdown_generator = None + + # Legacy method + h = CustomHTML2Text() + h.update_params(**kwargs.get('html2text', {})) + markdown = h.handle(cleaned_html) + markdown = markdown.replace(' ```', '```') + + fit_markdown = "Set flag 'fit_markdown' to True to get cleaned HTML content." + fit_html = "Set flag 'fit_markdown' to True to get cleaned HTML content." + + if kwargs.get('content_filter', None) or kwargs.get('fit_markdown', False): + content_filter = kwargs.get('content_filter', None) + if not content_filter: + content_filter = BM25ContentFilter( + user_query=kwargs.get('fit_markdown_user_query', None), + bm25_threshold=kwargs.get('fit_markdown_bm25_threshold', 1.0) + ) + fit_html = content_filter.filter_content(html) + fit_html = '\n'.join('
{}
'.format(s) for s in fit_html) + fit_markdown = h.handle(fit_html) + + markdown_v2 = MarkdownGenerationResult( + raw_markdown=markdown, + markdown_with_citations=markdown, + references_markdown=markdown, + fit_markdown=fit_markdown + ) + + return { + 'markdown': markdown, + 'fit_markdown': fit_markdown, + 'fit_html': fit_html, + 'markdown_v2' : markdown_v2 + } + + + def _get_content_of_website_optimized(self, url: str, html: str, word_count_threshold: int = MIN_WORD_THRESHOLD, css_selector: str = None, **kwargs) -> Dict[str, Any]: + success = True + if not html: + return None + + # soup = BeautifulSoup(html, 'html.parser') + soup = BeautifulSoup(html, 'lxml') + body = soup.body + + try: + meta = extract_metadata("", soup) + except Exception as e: + self._log('error', + message="Error extracting metadata: {error}", + tag="SCRAPE", + params={"error": str(e)} + ) + # print('Error extracting metadata:', str(e)) + meta = {} + + + image_description_min_word_threshold = kwargs.get('image_description_min_word_threshold', IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD) + + for tag in kwargs.get('excluded_tags', []) or []: + for el in body.select(tag): + el.decompose() + + if css_selector: + selected_elements = body.select(css_selector) + if not selected_elements: + return { + 'markdown': '', + 'cleaned_html': '', + 'success': True, + 'media': {'images': [], 'videos': [], 'audios': []}, + 'links': {'internal': [], 'external': []}, + 'metadata': {}, + 'message': f"No elements found for CSS selector: {css_selector}" + } + # raise InvalidCSSSelectorError(f"Invalid CSS selector, No elements found for CSS selector: {css_selector}") + body = soup.new_tag('div') + for el in selected_elements: + body.append(el) + + links = {'internal': [], 'external': []} + media = {'images': [], 'videos': [], 'audios': []} + internal_links_dict = {} + external_links_dict = {} + + # Extract meaningful text for media files from closest parent + def find_closest_parent_with_useful_text(tag): + current_tag = tag + while current_tag: + current_tag = current_tag.parent + # Get the text content of the parent tag + if current_tag: + text_content = current_tag.get_text(separator=' ',strip=True) + # Check if the text content has at least word_count_threshold + if len(text_content.split()) >= image_description_min_word_threshold: + return text_content + return None + + def process_image_old(img, url, index, total_images): + def parse_srcset(srcset_str): + """Parse srcset attribute into list of image URLs with their sizes.""" + if not srcset_str: + return [] + + sources = [] + # Split on http/https and filter empty strings + urls = [f"http{part}" for part in srcset_str.split("http") if part] + + for url in urls: + # Remove trailing comma and whitespace, then split to get width + url = url.strip().rstrip(',') + parts = url.rsplit(' ', 1) + img_url = parts[0].strip() + width = parts[1].rstrip('w') if len(parts) > 1 else None + sources.append({'url': img_url, 'width': width}) + + return sources + + #Check if an image has valid display and inside undesired html elements + def is_valid_image(img, parent, parent_classes): + style = img.get('style', '') + src = img.get('src', '') + classes_to_check = ['button', 'icon', 'logo'] + tags_to_check = ['button', 'input'] + return all([ + 'display:none' not in style, + src, + not any(s in var for var in [src, img.get('alt', ''), *parent_classes] for s in classes_to_check), + parent.name not in tags_to_check + ]) + + #Score an image for it's usefulness + def score_image_for_usefulness(img, base_url, index, images_count): + image_height = img.get('height') + height_value, height_unit = parse_dimension(image_height) + image_width = img.get('width') + width_value, width_unit = parse_dimension(image_width) + image_size = 0 #int(fetch_image_file_size(img,base_url) or 0) + image_src = img.get('src','') + if "data:image/" in image_src: + image_format = image_src.split(',')[0].split(';')[0].split('/')[1] + else: + image_format = os.path.splitext(img.get('src',''))[1].lower() + # Remove . from format + image_format = image_format.strip('.').split('?')[0] + score = 0 + if height_value: + if height_unit == 'px' and height_value > 150: + score += 1 + if height_unit in ['%','vh','vmin','vmax'] and height_value >30: + score += 1 + if width_value: + if width_unit == 'px' and width_value > 150: + score += 1 + if width_unit in ['%','vh','vmin','vmax'] and width_value >30: + score += 1 + if image_size > 10000: + score += 1 + if img.get('alt') != '': + score+=1 + if any(image_format==format for format in ['jpg','png','webp']): + score+=1 + if index/images_count<0.5: + score+=1 + return score + + if not is_valid_image(img, img.parent, img.parent.get('class', [])): + return None + + score = score_image_for_usefulness(img, url, index, total_images) + if score <= kwargs.get('image_score_threshold', IMAGE_SCORE_THRESHOLD): + return None + + base_result = { + 'src': img.get('src', ''), + 'data-src': img.get('data-src', ''), + 'alt': img.get('alt', ''), + 'desc': find_closest_parent_with_useful_text(img), + 'score': score, + 'type': 'image' + } + + sources = [] + srcset = img.get('srcset', '') + if srcset: + sources = parse_srcset(srcset) + if sources: + return [dict(base_result, src=source['url'], width=source['width']) + for source in sources] + + return [base_result] # Always return a list + + def process_image(img, url, index, total_images): + parse_srcset = lambda s: [{'url': u.strip().split()[0], 'width': u.strip().split()[-1].rstrip('w') + if ' ' in u else None} + for u in [f"http{p}" for p in s.split("http") if p]] + + # Constants for checks + classes_to_check = frozenset(['button', 'icon', 'logo']) + tags_to_check = frozenset(['button', 'input']) + + # Pre-fetch commonly used attributes + style = img.get('style', '') + alt = img.get('alt', '') + src = img.get('src', '') + data_src = img.get('data-src', '') + width = img.get('width') + height = img.get('height') + parent = img.parent + parent_classes = parent.get('class', []) + + # Quick validation checks + if ('display:none' in style or + parent.name in tags_to_check or + any(c in cls for c in parent_classes for cls in classes_to_check) or + any(c in src for c in classes_to_check) or + any(c in alt for c in classes_to_check)): + return None + + # Quick score calculation + score = 0 + if width and width.isdigit(): + width_val = int(width) + score += 1 if width_val > 150 else 0 + if height and height.isdigit(): + height_val = int(height) + score += 1 if height_val > 150 else 0 + if alt: + score += 1 + score += index/total_images < 0.5 + + image_format = '' + if "data:image/" in src: + image_format = src.split(',')[0].split(';')[0].split('/')[1].split(';')[0] + else: + image_format = os.path.splitext(src)[1].lower().strip('.').split('?')[0] + + if image_format in ('jpg', 'png', 'webp', 'avif'): + score += 1 + + if score <= kwargs.get('image_score_threshold', IMAGE_SCORE_THRESHOLD): + return None + + # Use set for deduplication + unique_urls = set() + image_variants = [] + + # Base image info template + base_info = { + 'alt': alt, + 'desc': find_closest_parent_with_useful_text(img), + 'score': score, + 'type': 'image' + } + + # Inline function for adding variants + def add_variant(src, width=None): + if src and not src.startswith('data:') and src not in unique_urls: + unique_urls.add(src) + image_variants.append({**base_info, 'src': src, 'width': width}) + + # Process all sources + add_variant(src) + add_variant(data_src) + + # Handle srcset and data-srcset in one pass + for attr in ('srcset', 'data-srcset'): + if value := img.get(attr): + for source in parse_srcset(value): + add_variant(source['url'], source['width']) + + # Quick picture element check + if picture := img.find_parent('picture'): + for source in picture.find_all('source'): + if srcset := source.get('srcset'): + for src in parse_srcset(srcset): + add_variant(src['url'], src['width']) + + # Framework-specific attributes in one pass + for attr, value in img.attrs.items(): + if attr.startswith('data-') and ('src' in attr or 'srcset' in attr) and 'http' in value: + add_variant(value) + + return image_variants if image_variants else None + + def remove_unwanted_attributes(element, important_attrs, keep_data_attributes=False): + attrs_to_remove = [] + for attr in element.attrs: + if attr not in important_attrs: + if keep_data_attributes: + if not attr.startswith('data-'): + attrs_to_remove.append(attr) + else: + attrs_to_remove.append(attr) + + for attr in attrs_to_remove: + del element[attr] + + def process_element(element: element.PageElement) -> bool: + try: + if isinstance(element, NavigableString): + if isinstance(element, Comment): + element.extract() + return False + + # if element.name == 'img': + # process_image(element, url, 0, 1) + # return True + + if element.name in ['script', 'style', 'link', 'meta', 'noscript']: + element.decompose() + return False + + keep_element = False + + exclude_social_media_domains = SOCIAL_MEDIA_DOMAINS + kwargs.get('exclude_social_media_domains', []) + exclude_social_media_domains = list(set(exclude_social_media_domains)) + + try: + if element.name == 'a' and element.get('href'): + href = element.get('href', '').strip() + if not href: # Skip empty hrefs + return False + + url_base = url.split('/')[2] + + # Normalize the URL + try: + normalized_href = normalize_url(href, url) + except ValueError as e: + # logging.warning(f"Invalid URL format: {href}, Error: {str(e)}") + return False + + link_data = { + 'href': normalized_href, + 'text': element.get_text().strip(), + 'title': element.get('title', '').strip() + } + + # Check for duplicates and add to appropriate dictionary + is_external = is_external_url(normalized_href, url_base) + if is_external: + if normalized_href not in external_links_dict: + external_links_dict[normalized_href] = link_data + else: + if normalized_href not in internal_links_dict: + internal_links_dict[normalized_href] = link_data + + keep_element = True + + # Handle external link exclusions + if is_external: + if kwargs.get('exclude_external_links', False): + element.decompose() + return False + elif kwargs.get('exclude_social_media_links', False): + if any(domain in normalized_href.lower() for domain in exclude_social_media_domains): + element.decompose() + return False + elif kwargs.get('exclude_domains', []): + if any(domain in normalized_href.lower() for domain in kwargs.get('exclude_domains', [])): + element.decompose() + return False + + except Exception as e: + raise Exception(f"Error processing links: {str(e)}") + + try: + if element.name == 'img': + potential_sources = ['src', 'data-src', 'srcset' 'data-lazy-src', 'data-original'] + src = element.get('src', '') + while not src and potential_sources: + src = element.get(potential_sources.pop(0), '') + if not src: + element.decompose() + return False + + # If it is srcset pick up the first image + if 'srcset' in element.attrs: + src = element.attrs['srcset'].split(',')[0].split(' ')[0] + + # Check flag if we should remove external images + if kwargs.get('exclude_external_images', False): + src_url_base = src.split('/')[2] + url_base = url.split('/')[2] + if url_base not in src_url_base: + element.decompose() + return False + + if not kwargs.get('exclude_external_images', False) and kwargs.get('exclude_social_media_links', False): + src_url_base = src.split('/')[2] + url_base = url.split('/')[2] + if any(domain in src for domain in exclude_social_media_domains): + element.decompose() + return False + + # Handle exclude domains + if kwargs.get('exclude_domains', []): + if any(domain in src for domain in kwargs.get('exclude_domains', [])): + element.decompose() + return False + + return True # Always keep image elements + except Exception as e: + raise "Error processing images" + + + # Check if flag to remove all forms is set + if kwargs.get('remove_forms', False) and element.name == 'form': + element.decompose() + return False + + if element.name in ['video', 'audio']: + media[f"{element.name}s"].append({ + 'src': element.get('src'), + 'alt': element.get('alt'), + 'type': element.name, + 'description': find_closest_parent_with_useful_text(element) + }) + source_tags = element.find_all('source') + for source_tag in source_tags: + media[f"{element.name}s"].append({ + 'src': source_tag.get('src'), + 'alt': element.get('alt'), + 'type': element.name, + 'description': find_closest_parent_with_useful_text(element) + }) + return True # Always keep video and audio elements + + if element.name in ONLY_TEXT_ELIGIBLE_TAGS: + if kwargs.get('only_text', False): + element.replace_with(element.get_text()) + + try: + remove_unwanted_attributes(element, IMPORTANT_ATTRS, kwargs.get('keep_data_attributes', False)) + except Exception as e: + # print('Error removing unwanted attributes:', str(e)) + self._log('error', + message="Error removing unwanted attributes: {error}", + tag="SCRAPE", + params={"error": str(e)} + ) + # Process children + for child in list(element.children): + if isinstance(child, NavigableString) and not isinstance(child, Comment): + if len(child.strip()) > 0: + keep_element = True + else: + if process_element(child): + keep_element = True + + + # Check word count + if not keep_element: + word_count = len(element.get_text(strip=True).split()) + keep_element = word_count >= word_count_threshold + + if not keep_element: + element.decompose() + + return keep_element + except Exception as e: + # print('Error processing element:', str(e)) + self._log('error', + message="Error processing element: {error}", + tag="SCRAPE", + params={"error": str(e)} + ) + return False + + process_element(body) + + # Update the links dictionary with unique links + links['internal'] = list(internal_links_dict.values()) + links['external'] = list(external_links_dict.values()) + + # # Process images using ThreadPoolExecutor + imgs = body.find_all('img') + + # For test we use for loop instead of thread + media['images'] = [ + img for result in (process_image(img, url, i, len(imgs)) + for i, img in enumerate(imgs)) + if result is not None + for img in result + ] + + def flatten_nested_elements(node): + if isinstance(node, NavigableString): + return node + if len(node.contents) == 1 and isinstance(node.contents[0], element.Tag) and node.contents[0].name == node.name: + return flatten_nested_elements(node.contents[0]) + node.contents = [flatten_nested_elements(child) for child in node.contents] + return node + + body = flatten_nested_elements(body) + base64_pattern = re.compile(r'data:image/[^;]+;base64,([^"]+)') + for img in imgs: + src = img.get('src', '') + if base64_pattern.match(src): + # Replace base64 data with empty string + img['src'] = base64_pattern.sub('', src) + + str_body = "" + try: + str_body = body.encode_contents().decode('utf-8') + except Exception as e: + # Reset body to the original HTML + success = False + body = BeautifulSoup(html, 'html.parser') + + # Create a new div with a special ID + error_div = body.new_tag('div', id='crawl4ai_error_message') + error_div.string = ''' + Crawl4AI Error: This page is not fully supported. + + Possible reasons: + 1. The page may have restrictions that prevent crawling. + 2. The page might not be fully loaded. + + Suggestions: + - Try calling the crawl function with these parameters: + magic=True, + - Set headless=False to visualize what's happening on the page. + + If the issue persists, please check the page's structure and any potential anti-crawling measures. + ''' + + # Append the error div to the body + body.body.append(error_div) + str_body = body.encode_contents().decode('utf-8') + + print(f"[LOG] 😧 Error: After processing the crawled HTML and removing irrelevant tags, nothing was left in the page. Check the markdown for further details.") + self._log('error', + message="After processing the crawled HTML and removing irrelevant tags, nothing was left in the page. Check the markdown for further details.", + tag="SCRAPE" + ) + + cleaned_html = str_body.replace('\n\n', '\n').replace(' ', ' ') + + markdown_content = self._generate_markdown_content( + cleaned_html=cleaned_html, + html=html, + url=url, + success=success, + **kwargs + ) + + return { + **markdown_content, + 'cleaned_html': cleaned_html, + 'success': success, + 'media': media, + 'links': links, + 'metadata': meta + } diff --git a/crawl4ai/content_scrapping_strategy.py b/crawl4ai/content_scrapping_strategy.py deleted file mode 100644 index 68f0341..0000000 --- a/crawl4ai/content_scrapping_strategy.py +++ /dev/null @@ -1,301 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Dict, Any -from bs4 import BeautifulSoup -from concurrent.futures import ThreadPoolExecutor -import asyncio, requests, re, os -from .config import * -from bs4 import element, NavigableString, Comment -from urllib.parse import urljoin -from requests.exceptions import InvalidSchema - -from .utils import ( - sanitize_input_encode, - sanitize_html, - extract_metadata, - InvalidCSSSelectorError, - CustomHTML2Text -) - -class ContentScrappingStrategy(ABC): - @abstractmethod - def scrap(self, url: str, html: str, **kwargs) -> Dict[str, Any]: - pass - - @abstractmethod - async def ascrap(self, url: str, html: str, **kwargs) -> Dict[str, Any]: - pass - -class WebScrappingStrategy(ContentScrappingStrategy): - def scrap(self, url: str, html: str, **kwargs) -> Dict[str, Any]: - return self._get_content_of_website_optimized(url, html, is_async=False, **kwargs) - - async def ascrap(self, url: str, html: str, **kwargs) -> Dict[str, Any]: - return await asyncio.to_thread(self._get_content_of_website_optimized, url, html, **kwargs) - - def _get_content_of_website_optimized(self, url: str, html: str, word_count_threshold: int = MIN_WORD_THRESHOLD, css_selector: str = None, **kwargs) -> Dict[str, Any]: - if not html: - return None - - soup = BeautifulSoup(html, 'html.parser') - body = soup.body - - image_description_min_word_threshold = kwargs.get('image_description_min_word_threshold', IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD) - - for tag in kwargs.get('excluded_tags', []) or []: - for el in body.select(tag): - el.decompose() - - if css_selector: - selected_elements = body.select(css_selector) - if not selected_elements: - return { - 'markdown': '', - 'cleaned_html': '', - 'success': True, - 'media': {'images': [], 'videos': [], 'audios': []}, - 'links': {'internal': [], 'external': []}, - 'metadata': {}, - 'message': f"No elements found for CSS selector: {css_selector}" - } - # raise InvalidCSSSelectorError(f"Invalid CSS selector, No elements found for CSS selector: {css_selector}") - body = soup.new_tag('div') - for el in selected_elements: - body.append(el) - - links = {'internal': [], 'external': []} - media = {'images': [], 'videos': [], 'audios': []} - - # Extract meaningful text for media files from closest parent - def find_closest_parent_with_useful_text(tag): - current_tag = tag - while current_tag: - current_tag = current_tag.parent - # Get the text content of the parent tag - if current_tag: - text_content = current_tag.get_text(separator=' ',strip=True) - # Check if the text content has at least word_count_threshold - if len(text_content.split()) >= image_description_min_word_threshold: - return text_content - return None - - def process_image(img, url, index, total_images): - #Check if an image has valid display and inside undesired html elements - def is_valid_image(img, parent, parent_classes): - style = img.get('style', '') - src = img.get('src', '') - classes_to_check = ['button', 'icon', 'logo'] - tags_to_check = ['button', 'input'] - return all([ - 'display:none' not in style, - src, - not any(s in var for var in [src, img.get('alt', ''), *parent_classes] for s in classes_to_check), - parent.name not in tags_to_check - ]) - - #Score an image for it's usefulness - def score_image_for_usefulness(img, base_url, index, images_count): - # Function to parse image height/width value and units - def parse_dimension(dimension): - if dimension: - match = re.match(r"(\d+)(\D*)", dimension) - if match: - number = int(match.group(1)) - unit = match.group(2) or 'px' # Default unit is 'px' if not specified - return number, unit - return None, None - - # Fetch image file metadata to extract size and extension - def fetch_image_file_size(img, base_url): - #If src is relative path construct full URL, if not it may be CDN URL - img_url = urljoin(base_url,img.get('src')) - try: - response = requests.head(img_url) - if response.status_code == 200: - return response.headers.get('Content-Length',None) - else: - print(f"Failed to retrieve file size for {img_url}") - return None - except InvalidSchema as e: - return None - finally: - return - - image_height = img.get('height') - height_value, height_unit = parse_dimension(image_height) - image_width = img.get('width') - width_value, width_unit = parse_dimension(image_width) - image_size = 0 #int(fetch_image_file_size(img,base_url) or 0) - image_format = os.path.splitext(img.get('src',''))[1].lower() - # Remove . from format - image_format = image_format.strip('.').split('?')[0] - score = 0 - if height_value: - if height_unit == 'px' and height_value > 150: - score += 1 - if height_unit in ['%','vh','vmin','vmax'] and height_value >30: - score += 1 - if width_value: - if width_unit == 'px' and width_value > 150: - score += 1 - if width_unit in ['%','vh','vmin','vmax'] and width_value >30: - score += 1 - if image_size > 10000: - score += 1 - if img.get('alt') != '': - score+=1 - if any(image_format==format for format in ['jpg','png','webp']): - score+=1 - if index/images_count<0.5: - score+=1 - return score - - if not is_valid_image(img, img.parent, img.parent.get('class', [])): - return None - score = score_image_for_usefulness(img, url, index, total_images) - if score <= IMAGE_SCORE_THRESHOLD: - return None - return { - 'src': img.get('src', ''), - 'data-src': img.get('data-src', ''), - 'alt': img.get('alt', ''), - 'desc': find_closest_parent_with_useful_text(img), - 'score': score, - 'type': 'image' - } - - def process_element(element: element.PageElement) -> bool: - try: - if isinstance(element, NavigableString): - if isinstance(element, Comment): - element.extract() - return False - - # if element.name == 'img': - # process_image(element, url, 0, 1) - # return True - - if element.name in ['script', 'style', 'link', 'meta', 'noscript']: - element.decompose() - return False - - keep_element = False - - if element.name == 'a' and element.get('href'): - href = element['href'] - url_base = url.split('/')[2] - link_data = {'href': href, 'text': element.get_text()} - if href.startswith('http') and url_base not in href: - links['external'].append(link_data) - else: - links['internal'].append(link_data) - keep_element = True - - elif element.name == 'img': - return True # Always keep image elements - - elif element.name in ['video', 'audio']: - media[f"{element.name}s"].append({ - 'src': element.get('src'), - 'alt': element.get('alt'), - 'type': element.name, - 'description': find_closest_parent_with_useful_text(element) - }) - source_tags = element.find_all('source') - for source_tag in source_tags: - media[f"{element.name}s"].append({ - 'src': source_tag.get('src'), - 'alt': element.get('alt'), - 'type': element.name, - 'description': find_closest_parent_with_useful_text(element) - }) - return True # Always keep video and audio elements - - if element.name != 'pre': - if element.name in ['b', 'i', 'u', 'span', 'del', 'ins', 'sub', 'sup', 'strong', 'em', 'code', 'kbd', 'var', 's', 'q', 'abbr', 'cite', 'dfn', 'time', 'small', 'mark']: - if kwargs.get('only_text', False): - element.replace_with(element.get_text()) - else: - element.unwrap() - elif element.name != 'img': - element.attrs = {} - - # Process children - for child in list(element.children): - if isinstance(child, NavigableString) and not isinstance(child, Comment): - if len(child.strip()) > 0: - keep_element = True - else: - if process_element(child): - keep_element = True - - - # Check word count - if not keep_element: - word_count = len(element.get_text(strip=True).split()) - keep_element = word_count >= word_count_threshold - - if not keep_element: - element.decompose() - - return keep_element - except Exception as e: - print('Error processing element:', str(e)) - return False - - #process images by filtering and extracting contextual text from the page - # imgs = body.find_all('img') - # media['images'] = [ - # result for result in - # (process_image(img, url, i, len(imgs)) for i, img in enumerate(imgs)) - # if result is not None - # ] - - process_element(body) - - # # Process images using ThreadPoolExecutor - imgs = body.find_all('img') - with ThreadPoolExecutor() as executor: - image_results = list(executor.map(process_image, imgs, [url]*len(imgs), range(len(imgs)), [len(imgs)]*len(imgs))) - media['images'] = [result for result in image_results if result is not None] - - def flatten_nested_elements(node): - if isinstance(node, NavigableString): - return node - if len(node.contents) == 1 and isinstance(node.contents[0], element.Tag) and node.contents[0].name == node.name: - return flatten_nested_elements(node.contents[0]) - node.contents = [flatten_nested_elements(child) for child in node.contents] - return node - - body = flatten_nested_elements(body) - base64_pattern = re.compile(r'data:image/[^;]+;base64,([^"]+)') - for img in imgs: - src = img.get('src', '') - if base64_pattern.match(src): - # Replace base64 data with empty string - img['src'] = base64_pattern.sub('', src) - cleaned_html = str(body).replace('\n\n', '\n').replace(' ', ' ') - - h = CustomHTML2Text() - h.ignore_links = True - h.body_width = 0 - try: - markdown = h.handle(cleaned_html) - except Exception as e: - markdown = h.handle(sanitize_html(cleaned_html)) - markdown = markdown.replace(' ```', '```') - - try: - meta = extract_metadata(html, soup) - except Exception as e: - print('Error extracting metadata:', str(e)) - meta = {} - - cleaned_html = sanitize_html(cleaned_html) - return { - 'markdown': markdown, - 'cleaned_html': cleaned_html, - 'success': True, - 'media': media, - 'links': links, - 'metadata': meta - } diff --git a/crawl4ai/crawler_strategy.py b/crawl4ai/crawler_strategy.py index 5d6864b..898dcfa 100644 --- a/crawl4ai/crawler_strategy.py +++ b/crawl4ai/crawler_strategy.py @@ -132,7 +132,7 @@ def __init__(self, use_cached_html=False, js_code=None, **kwargs): # chromedriver_autoinstaller.install() # import chromedriver_autoinstaller - # crawl4ai_folder = os.path.join(Path.home(), ".crawl4ai") + # crawl4ai_folder = os.path.join(os.getenv("CRAWL4_AI_BASE_DIRECTORY", Path.home()), ".crawl4ai") # driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=self.options) # chromedriver_path = chromedriver_autoinstaller.install() # chromedriver_path = chromedriver_autoinstaller.utils.download_chromedriver() @@ -205,7 +205,7 @@ def crawl(self, url: str, **kwargs) -> str: url_hash = hashlib.md5(url.encode()).hexdigest() if self.use_cached_html: - cache_file_path = os.path.join(Path.home(), ".crawl4ai", "cache", url_hash) + cache_file_path = os.path.join(os.getenv("CRAWL4_AI_BASE_DIRECTORY", Path.home()), ".crawl4ai", "cache", url_hash) if os.path.exists(cache_file_path): with open(cache_file_path, "r") as f: return sanitize_input_encode(f.read()) @@ -275,7 +275,7 @@ def crawl(self, url: str, **kwargs) -> str: self.driver = self.execute_hook('before_return_html', self.driver, html) # Store in cache - cache_file_path = os.path.join(Path.home(), ".crawl4ai", "cache", url_hash) + cache_file_path = os.path.join(os.getenv("CRAWL4_AI_BASE_DIRECTORY", Path.home()), ".crawl4ai", "cache", url_hash) with open(cache_file_path, "w", encoding="utf-8") as f: f.write(html) @@ -283,7 +283,7 @@ def crawl(self, url: str, **kwargs) -> str: print(f"[LOG] ✅ Crawled {url} successfully!") return html - except InvalidArgumentException: + except InvalidArgumentException as e: if not hasattr(e, 'msg'): e.msg = sanitize_input_encode(str(e)) raise InvalidArgumentException(f"Failed to crawl {url}: {e.msg}") diff --git a/crawl4ai/database.py b/crawl4ai/database.py index 37d9446..42ad701 100644 --- a/crawl4ai/database.py +++ b/crawl4ai/database.py @@ -3,7 +3,7 @@ import sqlite3 from typing import Optional, Tuple -DB_PATH = os.path.join(Path.home(), ".crawl4ai") +DB_PATH = os.path.join(os.getenv("CRAWL4_AI_BASE_DIRECTORY", Path.home()), ".crawl4ai") os.makedirs(DB_PATH, exist_ok=True) DB_PATH = os.path.join(DB_PATH, "crawl4ai.db") diff --git a/crawl4ai/extraction_strategy.py b/crawl4ai/extraction_strategy.py index 210a360..b79e0c4 100644 --- a/crawl4ai/extraction_strategy.py +++ b/crawl4ai/extraction_strategy.py @@ -68,7 +68,7 @@ def __init__(self, """ super().__init__() self.provider = provider - self.api_token = api_token or PROVIDER_MODELS.get(provider, None) or os.getenv("OPENAI_API_KEY") + self.api_token = api_token or PROVIDER_MODELS.get(provider, "no-token") or os.getenv("OPENAI_API_KEY") self.instruction = instruction self.extract_type = extraction_type self.schema = schema @@ -80,6 +80,7 @@ def __init__(self, self.word_token_rate = kwargs.get("word_token_rate", WORD_TOKEN_RATE) self.apply_chunking = kwargs.get("apply_chunking", True) self.base_url = kwargs.get("base_url", None) + self.api_base = kwargs.get("api_base", kwargs.get("base_url", None)) self.extra_args = kwargs.get("extra_args", {}) if not self.apply_chunking: self.chunk_token_threshold = 1e9 @@ -116,7 +117,7 @@ def extract(self, url: str, ix:int, html: str) -> List[Dict[str, Any]]: self.provider, prompt_with_variables, self.api_token, - base_url=self.base_url, + base_url=self.api_base or self.base_url, extra_args = self.extra_args ) # , json_response=self.extract_type == "schema") try: @@ -234,11 +235,12 @@ def __init__(self, semantic_filter = None, word_count_threshold=10, max_dist=0.2 """ Initialize the strategy with clustering parameters. - :param semantic_filter: A keyword filter for document filtering. - :param word_count_threshold: Minimum number of words per cluster. - :param max_dist: The maximum cophenetic distance on the dendrogram to form clusters. - :param linkage_method: The linkage method for hierarchical clustering. - :param top_k: Number of top categories to extract. + Args: + semantic_filter (str): A keyword filter for document filtering. + word_count_threshold (int): Minimum number of words per cluster. + max_dist (float): The maximum cophenetic distance on the dendrogram to form clusters. + linkage_method (str): The linkage method for hierarchical clustering. + top_k (int): Number of top categories to extract. """ super().__init__() @@ -257,8 +259,8 @@ def __init__(self, semantic_filter = None, word_count_threshold=10, max_dist=0.2 self.get_embedding_method = "direct" self.device = get_device() - import torch - self.device = torch.device('cpu') + # import torch + # self.device = torch.device('cpu') self.default_batch_size = calculate_batch_size(self.device) @@ -271,7 +273,7 @@ def __init__(self, semantic_filter = None, word_count_threshold=10, max_dist=0.2 # self.get_embedding_method = "direct" # else: - self.tokenizer, self.model = load_bge_small_en_v1_5() + self.tokenizer, self.model = load_HF_embedding_model(model_name) self.model.to(self.device) self.model.eval() @@ -738,7 +740,6 @@ def run(self, url: str, sections: List[str], *q, **kwargs) -> List[Dict[str, Any combined_html = self.DEL.join(sections) return self.extract(url, combined_html, **kwargs) - class JsonXPATHExtractionStrategy(ExtractionStrategy): def __init__(self, schema: Dict[str, Any], **kwargs): super().__init__(**kwargs) diff --git a/crawl4ai/html2text/__init__.py b/crawl4ai/html2text/__init__.py new file mode 100644 index 0000000..c1effe6 --- /dev/null +++ b/crawl4ai/html2text/__init__.py @@ -0,0 +1,1015 @@ +"""html2text: Turn HTML into equivalent Markdown-structured text.""" + +import html.entities +import html.parser +import re +import string +import urllib.parse as urlparse +from textwrap import wrap +from typing import Dict, List, Optional, Tuple, Union + +from . import config +from ._typing import OutCallback +from .elements import AnchorElement, ListElement +from .utils import ( + dumb_css_parser, + element_style, + escape_md, + escape_md_section, + google_fixed_width_font, + google_has_height, + google_list_style, + google_text_emphasis, + hn, + list_numbering_start, + pad_tables_in_text, + skipwrap, + unifiable_n, +) + +__version__ = (2024, 2, 26) + + +# TODO: +# Support decoded entities with UNIFIABLE. + + +class HTML2Text(html.parser.HTMLParser): + def __init__( + self, + out: Optional[OutCallback] = None, + baseurl: str = "", + bodywidth: int = config.BODY_WIDTH, + ) -> None: + """ + Input parameters: + out: possible custom replacement for self.outtextf (which + appends lines of text). + baseurl: base URL of the document we process + """ + super().__init__(convert_charrefs=False) + + # Config options + self.split_next_td = False + self.td_count = 0 + self.table_start = False + self.unicode_snob = config.UNICODE_SNOB # covered in cli + + self.escape_snob = config.ESCAPE_SNOB # covered in cli + self.escape_backslash = config.ESCAPE_BACKSLASH # covered in cli + self.escape_dot = config.ESCAPE_DOT # covered in cli + self.escape_plus = config.ESCAPE_PLUS # covered in cli + self.escape_dash = config.ESCAPE_DASH # covered in cli + + self.links_each_paragraph = config.LINKS_EACH_PARAGRAPH + self.body_width = bodywidth # covered in cli + self.skip_internal_links = config.SKIP_INTERNAL_LINKS # covered in cli + self.inline_links = config.INLINE_LINKS # covered in cli + self.protect_links = config.PROTECT_LINKS # covered in cli + self.google_list_indent = config.GOOGLE_LIST_INDENT # covered in cli + self.ignore_links = config.IGNORE_ANCHORS # covered in cli + self.ignore_mailto_links = config.IGNORE_MAILTO_LINKS # covered in cli + self.ignore_images = config.IGNORE_IMAGES # covered in cli + self.images_as_html = config.IMAGES_AS_HTML # covered in cli + self.images_to_alt = config.IMAGES_TO_ALT # covered in cli + self.images_with_size = config.IMAGES_WITH_SIZE # covered in cli + self.ignore_emphasis = config.IGNORE_EMPHASIS # covered in cli + self.bypass_tables = config.BYPASS_TABLES # covered in cli + self.ignore_tables = config.IGNORE_TABLES # covered in cli + self.google_doc = False # covered in cli + self.ul_item_mark = "*" # covered in cli + self.emphasis_mark = "_" # covered in cli + self.strong_mark = "**" + self.single_line_break = config.SINGLE_LINE_BREAK # covered in cli + self.use_automatic_links = config.USE_AUTOMATIC_LINKS # covered in cli + self.hide_strikethrough = False # covered in cli + self.mark_code = config.MARK_CODE + self.wrap_list_items = config.WRAP_LIST_ITEMS # covered in cli + self.wrap_links = config.WRAP_LINKS # covered in cli + self.wrap_tables = config.WRAP_TABLES + self.pad_tables = config.PAD_TABLES # covered in cli + self.default_image_alt = config.DEFAULT_IMAGE_ALT # covered in cli + self.tag_callback = None + self.open_quote = config.OPEN_QUOTE # covered in cli + self.close_quote = config.CLOSE_QUOTE # covered in cli + self.include_sup_sub = config.INCLUDE_SUP_SUB # covered in cli + + if out is None: + self.out = self.outtextf + else: + self.out = out + + # empty list to store output characters before they are "joined" + self.outtextlist: List[str] = [] + + self.quiet = 0 + self.p_p = 0 # number of newline character to print before next output + self.outcount = 0 + self.start = True + self.space = False + self.a: List[AnchorElement] = [] + self.astack: List[Optional[Dict[str, Optional[str]]]] = [] + self.maybe_automatic_link: Optional[str] = None + self.empty_link = False + self.absolute_url_matcher = re.compile(r"^[a-zA-Z+]+://") + self.acount = 0 + self.list: List[ListElement] = [] + self.blockquote = 0 + self.pre = False + self.startpre = False + self.code = False + self.quote = False + self.br_toggle = "" + self.lastWasNL = False + self.lastWasList = False + self.style = 0 + self.style_def: Dict[str, Dict[str, str]] = {} + self.tag_stack: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]] = [] + self.emphasis = 0 + self.drop_white_space = 0 + self.inheader = False + # Current abbreviation definition + self.abbr_title: Optional[str] = None + # Last inner HTML (for abbr being defined) + self.abbr_data: Optional[str] = None + # Stack of abbreviations to write later + self.abbr_list: Dict[str, str] = {} + self.baseurl = baseurl + self.stressed = False + self.preceding_stressed = False + self.preceding_data = "" + self.current_tag = "" + + config.UNIFIABLE["nbsp"] = " _place_holder;" + + def update_params(self, **kwargs): + for key, value in kwargs.items(): + setattr(self, key, value) + + def feed(self, data: str) -> None: + data = data.replace("", "") + super().feed(data) + + def handle(self, data: str) -> str: + self.start = True + self.feed(data) + self.feed("") + markdown = self.optwrap(self.finish()) + if self.pad_tables: + return pad_tables_in_text(markdown) + else: + return markdown + + def outtextf(self, s: str) -> None: + self.outtextlist.append(s) + if s: + self.lastWasNL = s[-1] == "\n" + + def finish(self) -> str: + self.close() + + self.pbr() + self.o("", force="end") + + outtext = "".join(self.outtextlist) + + if self.unicode_snob: + nbsp = html.entities.html5["nbsp;"] + else: + nbsp = " " + outtext = outtext.replace(" _place_holder;", nbsp) + + # Clear self.outtextlist to avoid memory leak of its content to + # the next handling. + self.outtextlist = [] + + return outtext + + def handle_charref(self, c: str) -> None: + self.handle_data(self.charref(c), True) + + def handle_entityref(self, c: str) -> None: + ref = self.entityref(c) + + # ref may be an empty string (e.g. for ‎/‏ markers that should + # not contribute to the final output). + # self.handle_data cannot handle a zero-length string right after a + # stressed tag or mid-text within a stressed tag (text get split and + # self.stressed/self.preceding_stressed gets switched after the first + # part of that text). + if ref: + self.handle_data(ref, True) + + def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]) -> None: + self.handle_tag(tag, dict(attrs), start=True) + + def handle_endtag(self, tag: str) -> None: + self.handle_tag(tag, {}, start=False) + + def previousIndex(self, attrs: Dict[str, Optional[str]]) -> Optional[int]: + """ + :type attrs: dict + + :returns: The index of certain set of attributes (of a link) in the + self.a list. If the set of attributes is not found, returns None + :rtype: int + """ + if "href" not in attrs: + return None + + match = False + for i, a in enumerate(self.a): + if "href" in a.attrs and a.attrs["href"] == attrs["href"]: + if "title" in a.attrs or "title" in attrs: + if ( + "title" in a.attrs + and "title" in attrs + and a.attrs["title"] == attrs["title"] + ): + match = True + else: + match = True + + if match: + return i + return None + + def handle_emphasis( + self, start: bool, tag_style: Dict[str, str], parent_style: Dict[str, str] + ) -> None: + """ + Handles various text emphases + """ + tag_emphasis = google_text_emphasis(tag_style) + parent_emphasis = google_text_emphasis(parent_style) + + # handle Google's text emphasis + strikethrough = "line-through" in tag_emphasis and self.hide_strikethrough + + # google and others may mark a font's weight as `bold` or `700` + bold = False + for bold_marker in config.BOLD_TEXT_STYLE_VALUES: + bold = bold_marker in tag_emphasis and bold_marker not in parent_emphasis + if bold: + break + + italic = "italic" in tag_emphasis and "italic" not in parent_emphasis + fixed = ( + google_fixed_width_font(tag_style) + and not google_fixed_width_font(parent_style) + and not self.pre + ) + + if start: + # crossed-out text must be handled before other attributes + # in order not to output qualifiers unnecessarily + if bold or italic or fixed: + self.emphasis += 1 + if strikethrough: + self.quiet += 1 + if italic: + self.o(self.emphasis_mark) + self.drop_white_space += 1 + if bold: + self.o(self.strong_mark) + self.drop_white_space += 1 + if fixed: + self.o("`") + self.drop_white_space += 1 + self.code = True + else: + if bold or italic or fixed: + # there must not be whitespace before closing emphasis mark + self.emphasis -= 1 + self.space = False + if fixed: + if self.drop_white_space: + # empty emphasis, drop it + self.drop_white_space -= 1 + else: + self.o("`") + self.code = False + if bold: + if self.drop_white_space: + # empty emphasis, drop it + self.drop_white_space -= 1 + else: + self.o(self.strong_mark) + if italic: + if self.drop_white_space: + # empty emphasis, drop it + self.drop_white_space -= 1 + else: + self.o(self.emphasis_mark) + # space is only allowed after *all* emphasis marks + if (bold or italic) and not self.emphasis: + self.o(" ") + if strikethrough: + self.quiet -= 1 + + def handle_tag( + self, tag: str, attrs: Dict[str, Optional[str]], start: bool + ) -> None: + self.current_tag = tag + + if self.tag_callback is not None: + if self.tag_callback(self, tag, attrs, start) is True: + return + + # first thing inside the anchor tag is another tag + # that produces some output + if ( + start + and self.maybe_automatic_link is not None + and tag not in ["p", "div", "style", "dl", "dt"] + and (tag != "img" or self.ignore_images) + ): + self.o("[") + self.maybe_automatic_link = None + self.empty_link = False + + if self.google_doc: + # the attrs parameter is empty for a closing tag. in addition, we + # need the attributes of the parent nodes in order to get a + # complete style description for the current element. we assume + # that google docs export well formed html. + parent_style: Dict[str, str] = {} + if start: + if self.tag_stack: + parent_style = self.tag_stack[-1][2] + tag_style = element_style(attrs, self.style_def, parent_style) + self.tag_stack.append((tag, attrs, tag_style)) + else: + dummy, attrs, tag_style = ( + self.tag_stack.pop() if self.tag_stack else (None, {}, {}) + ) + if self.tag_stack: + parent_style = self.tag_stack[-1][2] + + if hn(tag): + # check if nh is inside of an 'a' tag (incorrect but found in the wild) + if self.astack: + if start: + self.inheader = True + # are inside link name, so only add '#' if it can appear before '[' + if self.outtextlist and self.outtextlist[-1] == "[": + self.outtextlist.pop() + self.space = False + self.o(hn(tag) * "#" + " ") + self.o("[") + else: + self.p_p = 0 # don't break up link name + self.inheader = False + return # prevent redundant emphasis marks on headers + else: + self.p() + if start: + self.inheader = True + self.o(hn(tag) * "#" + " ") + else: + self.inheader = False + return # prevent redundant emphasis marks on headers + + if tag in ["p", "div"]: + if self.google_doc: + if start and google_has_height(tag_style): + self.p() + else: + self.soft_br() + elif self.astack: + pass + elif self.split_next_td: + pass + else: + self.p() + + if tag == "br" and start: + if self.blockquote > 0: + self.o(" \n> ") + else: + self.o(" \n") + + if tag == "hr" and start: + self.p() + self.o("* * *") + self.p() + + if tag in ["head", "style", "script"]: + if start: + self.quiet += 1 + else: + self.quiet -= 1 + + if tag == "style": + if start: + self.style += 1 + else: + self.style -= 1 + + if tag in ["body"]: + self.quiet = 0 # sites like 9rules.com never close + + if tag == "blockquote": + if start: + self.p() + self.o("> ", force=True) + self.start = True + self.blockquote += 1 + else: + self.blockquote -= 1 + self.p() + + if tag in ["em", "i", "u"] and not self.ignore_emphasis: + # Separate with a space if we immediately follow an alphanumeric + # character, since otherwise Markdown won't render the emphasis + # marks, and we'll be left with eg 'foo_bar_' visible. + # (Don't add a space otherwise, though, since there isn't one in the + # original HTML.) + if ( + start + and self.preceding_data + and self.preceding_data[-1] not in string.whitespace + and self.preceding_data[-1] not in string.punctuation + ): + emphasis = " " + self.emphasis_mark + self.preceding_data += " " + else: + emphasis = self.emphasis_mark + + self.o(emphasis) + if start: + self.stressed = True + + if tag in ["strong", "b"] and not self.ignore_emphasis: + # Separate with space if we immediately follow an * character, since + # without it, Markdown won't render the resulting *** correctly. + # (Don't add a space otherwise, though, since there isn't one in the + # original HTML.) + if ( + start + and self.preceding_data + # When `self.strong_mark` is set to empty, the next condition + # will cause IndexError since it's trying to match the data + # with the first character of the `self.strong_mark`. + and len(self.strong_mark) > 0 + and self.preceding_data[-1] == self.strong_mark[0] + ): + strong = " " + self.strong_mark + self.preceding_data += " " + else: + strong = self.strong_mark + + self.o(strong) + if start: + self.stressed = True + + if tag in ["del", "strike", "s"]: + if start and self.preceding_data and self.preceding_data[-1] == "~": + strike = " ~~" + self.preceding_data += " " + else: + strike = "~~" + + self.o(strike) + if start: + self.stressed = True + + if self.google_doc: + if not self.inheader: + # handle some font attributes, but leave headers clean + self.handle_emphasis(start, tag_style, parent_style) + + if tag in ["kbd", "code", "tt"] and not self.pre: + self.o("`") # TODO: `` `this` `` + self.code = not self.code + + if tag == "abbr": + if start: + self.abbr_title = None + self.abbr_data = "" + if "title" in attrs: + self.abbr_title = attrs["title"] + else: + if self.abbr_title is not None: + assert self.abbr_data is not None + self.abbr_list[self.abbr_data] = self.abbr_title + self.abbr_title = None + self.abbr_data = None + + if tag == "q": + if not self.quote: + self.o(self.open_quote) + else: + self.o(self.close_quote) + self.quote = not self.quote + + def link_url(self: HTML2Text, link: str, title: str = "") -> None: + url = urlparse.urljoin(self.baseurl, link) + title = ' "{}"'.format(title) if title.strip() else "" + self.o("]({url}{title})".format(url=escape_md(url), title=title)) + + if tag == "a" and not self.ignore_links: + if start: + if ( + "href" in attrs + and attrs["href"] is not None + and not (self.skip_internal_links and attrs["href"].startswith("#")) + and not ( + self.ignore_mailto_links and attrs["href"].startswith("mailto:") + ) + ): + self.astack.append(attrs) + self.maybe_automatic_link = attrs["href"] + self.empty_link = True + if self.protect_links: + attrs["href"] = "<" + attrs["href"] + ">" + else: + self.astack.append(None) + else: + if self.astack: + a = self.astack.pop() + if self.maybe_automatic_link and not self.empty_link: + self.maybe_automatic_link = None + elif a: + assert a["href"] is not None + if self.empty_link: + self.o("[") + self.empty_link = False + self.maybe_automatic_link = None + if self.inline_links: + self.p_p = 0 + title = a.get("title") or "" + title = escape_md(title) + link_url(self, a["href"], title) + else: + i = self.previousIndex(a) + if i is not None: + a_props = self.a[i] + else: + self.acount += 1 + a_props = AnchorElement(a, self.acount, self.outcount) + self.a.append(a_props) + self.o("][" + str(a_props.count) + "]") + + if tag == "img" and start and not self.ignore_images: + if "src" in attrs and attrs["src"] is not None: + if not self.images_to_alt: + attrs["href"] = attrs["src"] + alt = attrs.get("alt") or self.default_image_alt + + # If we have images_with_size, write raw html including width, + # height, and alt attributes + if self.images_as_html or ( + self.images_with_size and ("width" in attrs or "height" in attrs) + ): + self.o("") + return + + # If we have a link to create, output the start + if self.maybe_automatic_link is not None: + href = self.maybe_automatic_link + if ( + self.images_to_alt + and escape_md(alt) == href + and self.absolute_url_matcher.match(href) + ): + self.o("<" + escape_md(alt) + ">") + self.empty_link = False + return + else: + self.o("[") + self.maybe_automatic_link = None + self.empty_link = False + + # If we have images_to_alt, we discard the image itself, + # considering only the alt text. + if self.images_to_alt: + self.o(escape_md(alt)) + else: + self.o("![" + escape_md(alt) + "]") + if self.inline_links: + href = attrs.get("href") or "" + self.o( + "(" + escape_md(urlparse.urljoin(self.baseurl, href)) + ")" + ) + else: + i = self.previousIndex(attrs) + if i is not None: + a_props = self.a[i] + else: + self.acount += 1 + a_props = AnchorElement(attrs, self.acount, self.outcount) + self.a.append(a_props) + self.o("[" + str(a_props.count) + "]") + + if tag == "dl" and start: + self.p() + if tag == "dt" and not start: + self.pbr() + if tag == "dd" and start: + self.o(" ") + if tag == "dd" and not start: + self.pbr() + + if tag in ["ol", "ul"]: + # Google Docs create sub lists as top level lists + if not self.list and not self.lastWasList: + self.p() + if start: + if self.google_doc: + list_style = google_list_style(tag_style) + else: + list_style = tag + numbering_start = list_numbering_start(attrs) + self.list.append(ListElement(list_style, numbering_start)) + else: + if self.list: + self.list.pop() + if not self.google_doc and not self.list: + self.o("\n") + self.lastWasList = True + else: + self.lastWasList = False + + if tag == "li": + self.pbr() + if start: + if self.list: + li = self.list[-1] + else: + li = ListElement("ul", 0) + if self.google_doc: + self.o(" " * self.google_nest_count(tag_style)) + else: + # Indent two spaces per list, except use three spaces for an + # unordered list inside an ordered list. + # https://spec.commonmark.org/0.28/#motivation + # TODO: line up
  1. s > 9 correctly. + parent_list = None + for list in self.list: + self.o( + " " if parent_list == "ol" and list.name == "ul" else " " + ) + parent_list = list.name + + if li.name == "ul": + self.o(self.ul_item_mark + " ") + elif li.name == "ol": + li.num += 1 + self.o(str(li.num) + ". ") + self.start = True + + if tag in ["table", "tr", "td", "th"]: + if self.ignore_tables: + if tag == "tr": + if start: + pass + else: + self.soft_br() + else: + pass + + elif self.bypass_tables: + if start: + self.soft_br() + if tag in ["td", "th"]: + if start: + self.o("<{}>\n\n".format(tag)) + else: + self.o("\n".format(tag)) + else: + if start: + self.o("<{}>".format(tag)) + else: + self.o("".format(tag)) + + else: + if tag == "table": + if start: + self.table_start = True + if self.pad_tables: + self.o("<" + config.TABLE_MARKER_FOR_PAD + ">") + self.o(" \n") + else: + if self.pad_tables: + # add break in case the table is empty or its 1 row table + self.soft_br() + self.o("") + self.o(" \n") + if tag in ["td", "th"] and start: + if self.split_next_td: + self.o("| ") + self.split_next_td = True + + if tag == "tr" and start: + self.td_count = 0 + if tag == "tr" and not start: + self.split_next_td = False + self.soft_br() + if tag == "tr" and not start and self.table_start: + # Underline table header + self.o("|".join(["---"] * self.td_count)) + self.soft_br() + self.table_start = False + if tag in ["td", "th"] and start: + self.td_count += 1 + + if tag == "pre": + if start: + self.startpre = True + self.pre = True + else: + self.pre = False + if self.mark_code: + self.out("\n[/code]") + self.p() + + if tag in ["sup", "sub"] and self.include_sup_sub: + if start: + self.o("<{}>".format(tag)) + else: + self.o("".format(tag)) + + # TODO: Add docstring for these one letter functions + def pbr(self) -> None: + "Pretty print has a line break" + if self.p_p == 0: + self.p_p = 1 + + def p(self) -> None: + "Set pretty print to 1 or 2 lines" + self.p_p = 1 if self.single_line_break else 2 + + def soft_br(self) -> None: + "Soft breaks" + self.pbr() + self.br_toggle = " " + + def o( + self, data: str, puredata: bool = False, force: Union[bool, str] = False + ) -> None: + """ + Deal with indentation and whitespace + """ + if self.abbr_data is not None: + self.abbr_data += data + + if not self.quiet: + if self.google_doc: + # prevent white space immediately after 'begin emphasis' + # marks ('**' and '_') + lstripped_data = data.lstrip() + if self.drop_white_space and not (self.pre or self.code): + data = lstripped_data + if lstripped_data != "": + self.drop_white_space = 0 + + if puredata and not self.pre: + # This is a very dangerous call ... it could mess up + # all handling of   when not handled properly + # (see entityref) + data = re.sub(r"\s+", r" ", data) + if data and data[0] == " ": + self.space = True + data = data[1:] + if not data and not force: + return + + if self.startpre: + # self.out(" :") #TODO: not output when already one there + if not data.startswith("\n") and not data.startswith("\r\n"): + #
    stuff...
    +                    data = "\n" + data
    +                if self.mark_code:
    +                    self.out("\n[code]")
    +                    self.p_p = 0
    +
    +            bq = ">" * self.blockquote
    +            if not (force and data and data[0] == ">") and self.blockquote:
    +                bq += " "
    +
    +            if self.pre:
    +                if not self.list:
    +                    bq += "    "
    +                # else: list content is already partially indented
    +                bq += "    " * len(self.list)
    +                data = data.replace("\n", "\n" + bq)
    +
    +            if self.startpre:
    +                self.startpre = False
    +                if self.list:
    +                    # use existing initial indentation
    +                    data = data.lstrip("\n")
    +
    +            if self.start:
    +                self.space = False
    +                self.p_p = 0
    +                self.start = False
    +
    +            if force == "end":
    +                # It's the end.
    +                self.p_p = 0
    +                self.out("\n")
    +                self.space = False
    +
    +            if self.p_p:
    +                self.out((self.br_toggle + "\n" + bq) * self.p_p)
    +                self.space = False
    +                self.br_toggle = ""
    +
    +            if self.space:
    +                if not self.lastWasNL:
    +                    self.out(" ")
    +                self.space = False
    +
    +            if self.a and (
    +                (self.p_p == 2 and self.links_each_paragraph) or force == "end"
    +            ):
    +                if force == "end":
    +                    self.out("\n")
    +
    +                newa = []
    +                for link in self.a:
    +                    if self.outcount > link.outcount:
    +                        self.out(
    +                            "   ["
    +                            + str(link.count)
    +                            + "]: "
    +                            + urlparse.urljoin(self.baseurl, link.attrs["href"])
    +                        )
    +                        if "title" in link.attrs and link.attrs["title"] is not None:
    +                            self.out(" (" + link.attrs["title"] + ")")
    +                        self.out("\n")
    +                    else:
    +                        newa.append(link)
    +
    +                # Don't need an extra line when nothing was done.
    +                if self.a != newa:
    +                    self.out("\n")
    +
    +                self.a = newa
    +
    +            if self.abbr_list and force == "end":
    +                for abbr, definition in self.abbr_list.items():
    +                    self.out("  *[" + abbr + "]: " + definition + "\n")
    +
    +            self.p_p = 0
    +            self.out(data)
    +            self.outcount += 1
    +
    +    def handle_data(self, data: str, entity_char: bool = False) -> None:
    +        if not data:
    +            # Data may be empty for some HTML entities. For example,
    +            # LEFT-TO-RIGHT MARK.
    +            return
    +
    +        if self.stressed:
    +            data = data.strip()
    +            self.stressed = False
    +            self.preceding_stressed = True
    +        elif self.preceding_stressed:
    +            if (
    +                re.match(r"[^][(){}\s.!?]", data[0])
    +                and not hn(self.current_tag)
    +                and self.current_tag not in ["a", "code", "pre"]
    +            ):
    +                # should match a letter or common punctuation
    +                data = " " + data
    +            self.preceding_stressed = False
    +
    +        if self.style:
    +            self.style_def.update(dumb_css_parser(data))
    +
    +        if self.maybe_automatic_link is not None:
    +            href = self.maybe_automatic_link
    +            if (
    +                href == data
    +                and self.absolute_url_matcher.match(href)
    +                and self.use_automatic_links
    +            ):
    +                self.o("<" + data + ">")
    +                self.empty_link = False
    +                return
    +            else:
    +                self.o("[")
    +                self.maybe_automatic_link = None
    +                self.empty_link = False
    +
    +        if not self.code and not self.pre and not entity_char:
    +            data = escape_md_section(data, snob=self.escape_snob, escape_dot=self.escape_dot, escape_plus=self.escape_plus, escape_dash=self.escape_dash)
    +        self.preceding_data = data
    +        self.o(data, puredata=True)
    +
    +    def charref(self, name: str) -> str:
    +        if name[0] in ["x", "X"]:
    +            c = int(name[1:], 16)
    +        else:
    +            c = int(name)
    +
    +        if not self.unicode_snob and c in unifiable_n:
    +            return unifiable_n[c]
    +        else:
    +            try:
    +                return chr(c)
    +            except ValueError:  # invalid unicode
    +                return ""
    +
    +    def entityref(self, c: str) -> str:
    +        if not self.unicode_snob and c in config.UNIFIABLE:
    +            return config.UNIFIABLE[c]
    +        try:
    +            ch = html.entities.html5[c + ";"]
    +        except KeyError:
    +            return "&" + c + ";"
    +        return config.UNIFIABLE[c] if c == "nbsp" else ch
    +
    +    def google_nest_count(self, style: Dict[str, str]) -> int:
    +        """
    +        Calculate the nesting count of google doc lists
    +
    +        :type style: dict
    +
    +        :rtype: int
    +        """
    +        nest_count = 0
    +        if "margin-left" in style:
    +            nest_count = int(style["margin-left"][:-2]) // self.google_list_indent
    +
    +        return nest_count
    +
    +    def optwrap(self, text: str) -> str:
    +        """
    +        Wrap all paragraphs in the provided text.
    +
    +        :type text: str
    +
    +        :rtype: str
    +        """
    +        if not self.body_width:
    +            return text
    +
    +        result = ""
    +        newlines = 0
    +        # I cannot think of a better solution for now.
    +        # To avoid the non-wrap behaviour for entire paras
    +        # because of the presence of a link in it
    +        if not self.wrap_links:
    +            self.inline_links = False
    +        for para in text.split("\n"):
    +            if len(para) > 0:
    +                if not skipwrap(
    +                    para, self.wrap_links, self.wrap_list_items, self.wrap_tables
    +                ):
    +                    indent = ""
    +                    if para.startswith("  " + self.ul_item_mark):
    +                        # list item continuation: add a double indent to the
    +                        # new lines
    +                        indent = "    "
    +                    elif para.startswith("> "):
    +                        # blockquote continuation: add the greater than symbol
    +                        # to the new lines
    +                        indent = "> "
    +                    wrapped = wrap(
    +                        para,
    +                        self.body_width,
    +                        break_long_words=False,
    +                        subsequent_indent=indent,
    +                    )
    +                    result += "\n".join(wrapped)
    +                    if para.endswith("  "):
    +                        result += "  \n"
    +                        newlines = 1
    +                    elif indent:
    +                        result += "\n"
    +                        newlines = 1
    +                    else:
    +                        result += "\n\n"
    +                        newlines = 2
    +                else:
    +                    # Warning for the tempted!!!
    +                    # Be aware that obvious replacement of this with
    +                    # line.isspace()
    +                    # DOES NOT work! Explanations are welcome.
    +                    if not config.RE_SPACE.match(para):
    +                        result += para + "\n"
    +                        newlines = 1
    +            else:
    +                if newlines < 2:
    +                    result += "\n"
    +                    newlines += 1
    +        return result
    +
    +
    +def html2text(html: str, baseurl: str = "", bodywidth: Optional[int] = None) -> str:
    +    if bodywidth is None:
    +        bodywidth = config.BODY_WIDTH
    +    h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
    +
    +    return h.handle(html)
    diff --git a/crawl4ai/html2text/__main__.py b/crawl4ai/html2text/__main__.py
    new file mode 100644
    index 0000000..4e28416
    --- /dev/null
    +++ b/crawl4ai/html2text/__main__.py
    @@ -0,0 +1,3 @@
    +from .cli import main
    +
    +main()
    diff --git a/crawl4ai/html2text/_typing.py b/crawl4ai/html2text/_typing.py
    new file mode 100644
    index 0000000..eed8325
    --- /dev/null
    +++ b/crawl4ai/html2text/_typing.py
    @@ -0,0 +1,2 @@
    +class OutCallback:
    +    def __call__(self, s: str) -> None: ...
    diff --git a/crawl4ai/html2text/cli.py b/crawl4ai/html2text/cli.py
    new file mode 100644
    index 0000000..0153227
    --- /dev/null
    +++ b/crawl4ai/html2text/cli.py
    @@ -0,0 +1,330 @@
    +import argparse
    +import sys
    +
    +from . import HTML2Text, __version__, config
    +
    +
    +def main() -> None:
    +    baseurl = ""
    +
    +    class bcolors:
    +        HEADER = "\033[95m"
    +        OKBLUE = "\033[94m"
    +        OKGREEN = "\033[92m"
    +        WARNING = "\033[93m"
    +        FAIL = "\033[91m"
    +        ENDC = "\033[0m"
    +        BOLD = "\033[1m"
    +        UNDERLINE = "\033[4m"
    +
    +    p = argparse.ArgumentParser()
    +    p.add_argument(
    +        "--default-image-alt",
    +        dest="default_image_alt",
    +        default=config.DEFAULT_IMAGE_ALT,
    +        help="The default alt string for images with missing ones",
    +    )
    +    p.add_argument(
    +        "--pad-tables",
    +        dest="pad_tables",
    +        action="store_true",
    +        default=config.PAD_TABLES,
    +        help="pad the cells to equal column width in tables",
    +    )
    +    p.add_argument(
    +        "--no-wrap-links",
    +        dest="wrap_links",
    +        action="store_false",
    +        default=config.WRAP_LINKS,
    +        help="don't wrap links during conversion",
    +    )
    +    p.add_argument(
    +        "--wrap-list-items",
    +        dest="wrap_list_items",
    +        action="store_true",
    +        default=config.WRAP_LIST_ITEMS,
    +        help="wrap list items during conversion",
    +    )
    +    p.add_argument(
    +        "--wrap-tables",
    +        dest="wrap_tables",
    +        action="store_true",
    +        default=config.WRAP_TABLES,
    +        help="wrap tables",
    +    )
    +    p.add_argument(
    +        "--ignore-emphasis",
    +        dest="ignore_emphasis",
    +        action="store_true",
    +        default=config.IGNORE_EMPHASIS,
    +        help="don't include any formatting for emphasis",
    +    )
    +    p.add_argument(
    +        "--reference-links",
    +        dest="inline_links",
    +        action="store_false",
    +        default=config.INLINE_LINKS,
    +        help="use reference style links instead of inline links",
    +    )
    +    p.add_argument(
    +        "--ignore-links",
    +        dest="ignore_links",
    +        action="store_true",
    +        default=config.IGNORE_ANCHORS,
    +        help="don't include any formatting for links",
    +    )
    +    p.add_argument(
    +        "--ignore-mailto-links",
    +        action="store_true",
    +        dest="ignore_mailto_links",
    +        default=config.IGNORE_MAILTO_LINKS,
    +        help="don't include mailto: links",
    +    )
    +    p.add_argument(
    +        "--protect-links",
    +        dest="protect_links",
    +        action="store_true",
    +        default=config.PROTECT_LINKS,
    +        help="protect links from line breaks surrounding them with angle brackets",
    +    )
    +    p.add_argument(
    +        "--ignore-images",
    +        dest="ignore_images",
    +        action="store_true",
    +        default=config.IGNORE_IMAGES,
    +        help="don't include any formatting for images",
    +    )
    +    p.add_argument(
    +        "--images-as-html",
    +        dest="images_as_html",
    +        action="store_true",
    +        default=config.IMAGES_AS_HTML,
    +        help=(
    +            "Always write image tags as raw html; preserves `height`, `width` and "
    +            "`alt` if possible."
    +        ),
    +    )
    +    p.add_argument(
    +        "--images-to-alt",
    +        dest="images_to_alt",
    +        action="store_true",
    +        default=config.IMAGES_TO_ALT,
    +        help="Discard image data, only keep alt text",
    +    )
    +    p.add_argument(
    +        "--images-with-size",
    +        dest="images_with_size",
    +        action="store_true",
    +        default=config.IMAGES_WITH_SIZE,
    +        help=(
    +            "Write image tags with height and width attrs as raw html to retain "
    +            "dimensions"
    +        ),
    +    )
    +    p.add_argument(
    +        "-g",
    +        "--google-doc",
    +        action="store_true",
    +        dest="google_doc",
    +        default=False,
    +        help="convert an html-exported Google Document",
    +    )
    +    p.add_argument(
    +        "-d",
    +        "--dash-unordered-list",
    +        action="store_true",
    +        dest="ul_style_dash",
    +        default=False,
    +        help="use a dash rather than a star for unordered list items",
    +    )
    +    p.add_argument(
    +        "-e",
    +        "--asterisk-emphasis",
    +        action="store_true",
    +        dest="em_style_asterisk",
    +        default=False,
    +        help="use an asterisk rather than an underscore for emphasized text",
    +    )
    +    p.add_argument(
    +        "-b",
    +        "--body-width",
    +        dest="body_width",
    +        type=int,
    +        default=config.BODY_WIDTH,
    +        help="number of characters per output line, 0 for no wrap",
    +    )
    +    p.add_argument(
    +        "-i",
    +        "--google-list-indent",
    +        dest="list_indent",
    +        type=int,
    +        default=config.GOOGLE_LIST_INDENT,
    +        help="number of pixels Google indents nested lists",
    +    )
    +    p.add_argument(
    +        "-s",
    +        "--hide-strikethrough",
    +        action="store_true",
    +        dest="hide_strikethrough",
    +        default=False,
    +        help="hide strike-through text. only relevant when -g is " "specified as well",
    +    )
    +    p.add_argument(
    +        "--escape-all",
    +        action="store_true",
    +        dest="escape_snob",
    +        default=False,
    +        help=(
    +            "Escape all special characters.  Output is less readable, but avoids "
    +            "corner case formatting issues."
    +        ),
    +    )
    +    p.add_argument(
    +        "--bypass-tables",
    +        action="store_true",
    +        dest="bypass_tables",
    +        default=config.BYPASS_TABLES,
    +        help="Format tables in HTML rather than Markdown syntax.",
    +    )
    +    p.add_argument(
    +        "--ignore-tables",
    +        action="store_true",
    +        dest="ignore_tables",
    +        default=config.IGNORE_TABLES,
    +        help="Ignore table-related tags (table, th, td, tr) " "while keeping rows.",
    +    )
    +    p.add_argument(
    +        "--single-line-break",
    +        action="store_true",
    +        dest="single_line_break",
    +        default=config.SINGLE_LINE_BREAK,
    +        help=(
    +            "Use a single line break after a block element rather than two line "
    +            "breaks. NOTE: Requires --body-width=0"
    +        ),
    +    )
    +    p.add_argument(
    +        "--unicode-snob",
    +        action="store_true",
    +        dest="unicode_snob",
    +        default=config.UNICODE_SNOB,
    +        help="Use unicode throughout document",
    +    )
    +    p.add_argument(
    +        "--no-automatic-links",
    +        action="store_false",
    +        dest="use_automatic_links",
    +        default=config.USE_AUTOMATIC_LINKS,
    +        help="Do not use automatic links wherever applicable",
    +    )
    +    p.add_argument(
    +        "--no-skip-internal-links",
    +        action="store_false",
    +        dest="skip_internal_links",
    +        default=config.SKIP_INTERNAL_LINKS,
    +        help="Do not skip internal links",
    +    )
    +    p.add_argument(
    +        "--links-after-para",
    +        action="store_true",
    +        dest="links_each_paragraph",
    +        default=config.LINKS_EACH_PARAGRAPH,
    +        help="Put links after each paragraph instead of document",
    +    )
    +    p.add_argument(
    +        "--mark-code",
    +        action="store_true",
    +        dest="mark_code",
    +        default=config.MARK_CODE,
    +        help="Mark program code blocks with [code]...[/code]",
    +    )
    +    p.add_argument(
    +        "--decode-errors",
    +        dest="decode_errors",
    +        default=config.DECODE_ERRORS,
    +        help=(
    +            "What to do in case of decode errors.'ignore', 'strict' and 'replace' are "
    +            "acceptable values"
    +        ),
    +    )
    +    p.add_argument(
    +        "--open-quote",
    +        dest="open_quote",
    +        default=config.OPEN_QUOTE,
    +        help="The character used to open quotes",
    +    )
    +    p.add_argument(
    +        "--close-quote",
    +        dest="close_quote",
    +        default=config.CLOSE_QUOTE,
    +        help="The character used to close quotes",
    +    )
    +    p.add_argument(
    +        "--version", action="version", version=".".join(map(str, __version__))
    +    )
    +    p.add_argument("filename", nargs="?")
    +    p.add_argument("encoding", nargs="?", default="utf-8")
    +    p.add_argument(
    +        "--include-sup-sub",
    +        dest="include_sup_sub",
    +        action="store_true",
    +        default=config.INCLUDE_SUP_SUB,
    +        help="Include the sup and sub tags",
    +    )
    +    args = p.parse_args()
    +
    +    if args.filename and args.filename != "-":
    +        with open(args.filename, "rb") as fp:
    +            data = fp.read()
    +    else:
    +        data = sys.stdin.buffer.read()
    +
    +    try:
    +        html = data.decode(args.encoding, args.decode_errors)
    +    except UnicodeDecodeError as err:
    +        warning = bcolors.WARNING + "Warning:" + bcolors.ENDC
    +        warning += " Use the " + bcolors.OKGREEN
    +        warning += "--decode-errors=ignore" + bcolors.ENDC + " flag."
    +        print(warning)
    +        raise err
    +
    +    h = HTML2Text(baseurl=baseurl)
    +    # handle options
    +    if args.ul_style_dash:
    +        h.ul_item_mark = "-"
    +    if args.em_style_asterisk:
    +        h.emphasis_mark = "*"
    +        h.strong_mark = "__"
    +
    +    h.body_width = args.body_width
    +    h.google_list_indent = args.list_indent
    +    h.ignore_emphasis = args.ignore_emphasis
    +    h.ignore_links = args.ignore_links
    +    h.ignore_mailto_links = args.ignore_mailto_links
    +    h.protect_links = args.protect_links
    +    h.ignore_images = args.ignore_images
    +    h.images_as_html = args.images_as_html
    +    h.images_to_alt = args.images_to_alt
    +    h.images_with_size = args.images_with_size
    +    h.google_doc = args.google_doc
    +    h.hide_strikethrough = args.hide_strikethrough
    +    h.escape_snob = args.escape_snob
    +    h.bypass_tables = args.bypass_tables
    +    h.ignore_tables = args.ignore_tables
    +    h.single_line_break = args.single_line_break
    +    h.inline_links = args.inline_links
    +    h.unicode_snob = args.unicode_snob
    +    h.use_automatic_links = args.use_automatic_links
    +    h.skip_internal_links = args.skip_internal_links
    +    h.links_each_paragraph = args.links_each_paragraph
    +    h.mark_code = args.mark_code
    +    h.wrap_links = args.wrap_links
    +    h.wrap_list_items = args.wrap_list_items
    +    h.wrap_tables = args.wrap_tables
    +    h.pad_tables = args.pad_tables
    +    h.default_image_alt = args.default_image_alt
    +    h.open_quote = args.open_quote
    +    h.close_quote = args.close_quote
    +    h.include_sup_sub = args.include_sup_sub
    +
    +    sys.stdout.write(h.handle(html))
    diff --git a/crawl4ai/html2text/config.py b/crawl4ai/html2text/config.py
    new file mode 100644
    index 0000000..d14ed64
    --- /dev/null
    +++ b/crawl4ai/html2text/config.py
    @@ -0,0 +1,172 @@
    +import re
    +
    +# Use Unicode characters instead of their ascii pseudo-replacements
    +UNICODE_SNOB = False
    +
    +# Marker to use for marking tables for padding post processing
    +TABLE_MARKER_FOR_PAD = "special_marker_for_table_padding"
    +# Escape all special characters.  Output is less readable, but avoids
    +# corner case formatting issues.
    +ESCAPE_SNOB = False
    +ESCAPE_BACKSLASH = False
    +ESCAPE_DOT = False
    +ESCAPE_PLUS = False
    +ESCAPE_DASH = False
    +
    +# Put the links after each paragraph instead of at the end.
    +LINKS_EACH_PARAGRAPH = False
    +
    +# Wrap long lines at position. 0 for no wrapping.
    +BODY_WIDTH = 78
    +
    +# Don't show internal links (href="#local-anchor") -- corresponding link
    +# targets won't be visible in the plain text file anyway.
    +SKIP_INTERNAL_LINKS = True
    +
    +# Use inline, rather than reference, formatting for images and links
    +INLINE_LINKS = True
    +
    +# Protect links from line breaks surrounding them with angle brackets (in
    +# addition to their square brackets)
    +PROTECT_LINKS = False
    +# WRAP_LINKS = True
    +WRAP_LINKS = True
    +
    +# Wrap list items.
    +WRAP_LIST_ITEMS = False
    +
    +# Wrap tables
    +WRAP_TABLES = False
    +
    +# Number of pixels Google indents nested lists
    +GOOGLE_LIST_INDENT = 36
    +
    +# Values Google and others may use to indicate bold text
    +BOLD_TEXT_STYLE_VALUES = ("bold", "700", "800", "900")
    +
    +IGNORE_ANCHORS = False
    +IGNORE_MAILTO_LINKS = False
    +IGNORE_IMAGES = False
    +IMAGES_AS_HTML = False
    +IMAGES_TO_ALT = False
    +IMAGES_WITH_SIZE = False
    +IGNORE_EMPHASIS = False
    +MARK_CODE = False
    +DECODE_ERRORS = "strict"
    +DEFAULT_IMAGE_ALT = ""
    +PAD_TABLES = False
    +
    +# Convert links with same href and text to  format
    +# if they are absolute links
    +USE_AUTOMATIC_LINKS = True
    +
    +# For checking space-only lines on line 771
    +RE_SPACE = re.compile(r"\s\+")
    +
    +RE_ORDERED_LIST_MATCHER = re.compile(r"\d+\.\s")
    +RE_UNORDERED_LIST_MATCHER = re.compile(r"[-\*\+]\s")
    +RE_MD_CHARS_MATCHER = re.compile(r"([\\\[\]\(\)])")
    +RE_MD_CHARS_MATCHER_ALL = re.compile(r"([`\*_{}\[\]\(\)#!])")
    +
    +# to find links in the text
    +RE_LINK = re.compile(r"(\[.*?\] ?\(.*?\))|(\[.*?\]:.*?)")
    +
    +# to find table separators
    +RE_TABLE = re.compile(r" \| ")
    +
    +RE_MD_DOT_MATCHER = re.compile(
    +    r"""
    +    ^             # start of line
    +    (\s*\d+)      # optional whitespace and a number
    +    (\.)          # dot
    +    (?=\s)        # lookahead assert whitespace
    +    """,
    +    re.MULTILINE | re.VERBOSE,
    +)
    +RE_MD_PLUS_MATCHER = re.compile(
    +    r"""
    +    ^
    +    (\s*)
    +    (\+)
    +    (?=\s)
    +    """,
    +    flags=re.MULTILINE | re.VERBOSE,
    +)
    +RE_MD_DASH_MATCHER = re.compile(
    +    r"""
    +    ^
    +    (\s*)
    +    (-)
    +    (?=\s|\-)     # followed by whitespace (bullet list, or spaced out hr)
    +                  # or another dash (header or hr)
    +    """,
    +    flags=re.MULTILINE | re.VERBOSE,
    +)
    +RE_SLASH_CHARS = r"\`*_{}[]()#+-.!"
    +RE_MD_BACKSLASH_MATCHER = re.compile(
    +    r"""
    +    (\\)          # match one slash
    +    (?=[%s])      # followed by a char that requires escaping
    +    """
    +    % re.escape(RE_SLASH_CHARS),
    +    flags=re.VERBOSE,
    +)
    +
    +UNIFIABLE = {
    +    "rsquo": "'",
    +    "lsquo": "'",
    +    "rdquo": '"',
    +    "ldquo": '"',
    +    "copy": "(C)",
    +    "mdash": "--",
    +    "nbsp": " ",
    +    "rarr": "->",
    +    "larr": "<-",
    +    "middot": "*",
    +    "ndash": "-",
    +    "oelig": "oe",
    +    "aelig": "ae",
    +    "agrave": "a",
    +    "aacute": "a",
    +    "acirc": "a",
    +    "atilde": "a",
    +    "auml": "a",
    +    "aring": "a",
    +    "egrave": "e",
    +    "eacute": "e",
    +    "ecirc": "e",
    +    "euml": "e",
    +    "igrave": "i",
    +    "iacute": "i",
    +    "icirc": "i",
    +    "iuml": "i",
    +    "ograve": "o",
    +    "oacute": "o",
    +    "ocirc": "o",
    +    "otilde": "o",
    +    "ouml": "o",
    +    "ugrave": "u",
    +    "uacute": "u",
    +    "ucirc": "u",
    +    "uuml": "u",
    +    "lrm": "",
    +    "rlm": "",
    +}
    +
    +# Format tables in HTML rather than Markdown syntax
    +BYPASS_TABLES = False
    +# Ignore table-related tags (table, th, td, tr) while keeping rows
    +IGNORE_TABLES = False
    +
    +
    +# Use a single line break after a block element rather than two line breaks.
    +# NOTE: Requires body width setting to be 0.
    +SINGLE_LINE_BREAK = False
    +
    +
    +# Use double quotation marks when converting the  tag.
    +OPEN_QUOTE = '"'
    +CLOSE_QUOTE = '"'
    +
    +# Include the  and  tags
    +INCLUDE_SUP_SUB = False
    diff --git a/crawl4ai/html2text/elements.py b/crawl4ai/html2text/elements.py
    new file mode 100644
    index 0000000..2533ec0
    --- /dev/null
    +++ b/crawl4ai/html2text/elements.py
    @@ -0,0 +1,18 @@
    +from typing import Dict, Optional
    +
    +
    +class AnchorElement:
    +    __slots__ = ["attrs", "count", "outcount"]
    +
    +    def __init__(self, attrs: Dict[str, Optional[str]], count: int, outcount: int):
    +        self.attrs = attrs
    +        self.count = count
    +        self.outcount = outcount
    +
    +
    +class ListElement:
    +    __slots__ = ["name", "num"]
    +
    +    def __init__(self, name: str, num: int):
    +        self.name = name
    +        self.num = num
    diff --git a/crawl4ai/html2text/utils.py b/crawl4ai/html2text/utils.py
    new file mode 100644
    index 0000000..1909d2c
    --- /dev/null
    +++ b/crawl4ai/html2text/utils.py
    @@ -0,0 +1,303 @@
    +import html.entities
    +from typing import Dict, List, Optional
    +
    +from . import config
    +
    +unifiable_n = {
    +    html.entities.name2codepoint[k]: v
    +    for k, v in config.UNIFIABLE.items()
    +    if k != "nbsp"
    +}
    +
    +
    +def hn(tag: str) -> int:
    +    if tag[0] == "h" and len(tag) == 2:
    +        n = tag[1]
    +        if "0" < n <= "9":
    +            return int(n)
    +    return 0
    +
    +
    +def dumb_property_dict(style: str) -> Dict[str, str]:
    +    """
    +    :returns: A hash of css attributes
    +    """
    +    return {
    +        x.strip().lower(): y.strip().lower()
    +        for x, y in [z.split(":", 1) for z in style.split(";") if ":" in z]
    +    }
    +
    +
    +def dumb_css_parser(data: str) -> Dict[str, Dict[str, str]]:
    +    """
    +    :type data: str
    +
    +    :returns: A hash of css selectors, each of which contains a hash of
    +    css attributes.
    +    :rtype: dict
    +    """
    +    # remove @import sentences
    +    data += ";"
    +    importIndex = data.find("@import")
    +    while importIndex != -1:
    +        data = data[0:importIndex] + data[data.find(";", importIndex) + 1 :]
    +        importIndex = data.find("@import")
    +
    +    # parse the css. reverted from dictionary comprehension in order to
    +    # support older pythons
    +    pairs = [x.split("{") for x in data.split("}") if "{" in x.strip()]
    +    try:
    +        elements = {a.strip(): dumb_property_dict(b) for a, b in pairs}
    +    except ValueError:
    +        elements = {}  # not that important
    +
    +    return elements
    +
    +
    +def element_style(
    +    attrs: Dict[str, Optional[str]],
    +    style_def: Dict[str, Dict[str, str]],
    +    parent_style: Dict[str, str],
    +) -> Dict[str, str]:
    +    """
    +    :type attrs: dict
    +    :type style_def: dict
    +    :type style_def: dict
    +
    +    :returns: A hash of the 'final' style attributes of the element
    +    :rtype: dict
    +    """
    +    style = parent_style.copy()
    +    if "class" in attrs:
    +        assert attrs["class"] is not None
    +        for css_class in attrs["class"].split():
    +            css_style = style_def.get("." + css_class, {})
    +            style.update(css_style)
    +    if "style" in attrs:
    +        assert attrs["style"] is not None
    +        immediate_style = dumb_property_dict(attrs["style"])
    +        style.update(immediate_style)
    +
    +    return style
    +
    +
    +def google_list_style(style: Dict[str, str]) -> str:
    +    """
    +    Finds out whether this is an ordered or unordered list
    +
    +    :type style: dict
    +
    +    :rtype: str
    +    """
    +    if "list-style-type" in style:
    +        list_style = style["list-style-type"]
    +        if list_style in ["disc", "circle", "square", "none"]:
    +            return "ul"
    +
    +    return "ol"
    +
    +
    +def google_has_height(style: Dict[str, str]) -> bool:
    +    """
    +    Check if the style of the element has the 'height' attribute
    +    explicitly defined
    +
    +    :type style: dict
    +
    +    :rtype: bool
    +    """
    +    return "height" in style
    +
    +
    +def google_text_emphasis(style: Dict[str, str]) -> List[str]:
    +    """
    +    :type style: dict
    +
    +    :returns: A list of all emphasis modifiers of the element
    +    :rtype: list
    +    """
    +    emphasis = []
    +    if "text-decoration" in style:
    +        emphasis.append(style["text-decoration"])
    +    if "font-style" in style:
    +        emphasis.append(style["font-style"])
    +    if "font-weight" in style:
    +        emphasis.append(style["font-weight"])
    +
    +    return emphasis
    +
    +
    +def google_fixed_width_font(style: Dict[str, str]) -> bool:
    +    """
    +    Check if the css of the current element defines a fixed width font
    +
    +    :type style: dict
    +
    +    :rtype: bool
    +    """
    +    font_family = ""
    +    if "font-family" in style:
    +        font_family = style["font-family"]
    +    return "courier new" == font_family or "consolas" == font_family
    +
    +
    +def list_numbering_start(attrs: Dict[str, Optional[str]]) -> int:
    +    """
    +    Extract numbering from list element attributes
    +
    +    :type attrs: dict
    +
    +    :rtype: int or None
    +    """
    +    if "start" in attrs:
    +        assert attrs["start"] is not None
    +        try:
    +            return int(attrs["start"]) - 1
    +        except ValueError:
    +            pass
    +
    +    return 0
    +
    +
    +def skipwrap(
    +    para: str, wrap_links: bool, wrap_list_items: bool, wrap_tables: bool
    +) -> bool:
    +    # If it appears to contain a link
    +    # don't wrap
    +    if not wrap_links and config.RE_LINK.search(para):
    +        return True
    +    # If the text begins with four spaces or one tab, it's a code block;
    +    # don't wrap
    +    if para[0:4] == "    " or para[0] == "\t":
    +        return True
    +
    +    # If the text begins with only two "--", possibly preceded by
    +    # whitespace, that's an emdash; so wrap.
    +    stripped = para.lstrip()
    +    if stripped[0:2] == "--" and len(stripped) > 2 and stripped[2] != "-":
    +        return False
    +
    +    # I'm not sure what this is for; I thought it was to detect lists,
    +    # but there's a 
    -inside- case in one of the tests that + # also depends upon it. + if stripped[0:1] in ("-", "*") and not stripped[0:2] == "**": + return not wrap_list_items + + # If text contains a pipe character it is likely a table + if not wrap_tables and config.RE_TABLE.search(para): + return True + + # If the text begins with a single -, *, or +, followed by a space, + # or an integer, followed by a ., followed by a space (in either + # case optionally proceeded by whitespace), it's a list; don't wrap. + return bool( + config.RE_ORDERED_LIST_MATCHER.match(stripped) + or config.RE_UNORDERED_LIST_MATCHER.match(stripped) + ) + + +def escape_md(text: str) -> str: + """ + Escapes markdown-sensitive characters within other markdown + constructs. + """ + return config.RE_MD_CHARS_MATCHER.sub(r"\\\1", text) + + +def escape_md_section( + text: str, + escape_backslash: bool = True, + snob: bool = False, + escape_dot: bool = True, + escape_plus: bool = True, + escape_dash: bool = True +) -> str: + """ + Escapes markdown-sensitive characters across whole document sections. + Each escaping operation can be controlled individually. + """ + if escape_backslash: + text = config.RE_MD_BACKSLASH_MATCHER.sub(r"\\\1", text) + + if snob: + text = config.RE_MD_CHARS_MATCHER_ALL.sub(r"\\\1", text) + + if escape_dot: + text = config.RE_MD_DOT_MATCHER.sub(r"\1\\\2", text) + + if escape_plus: + text = config.RE_MD_PLUS_MATCHER.sub(r"\1\\\2", text) + + if escape_dash: + text = config.RE_MD_DASH_MATCHER.sub(r"\1\\\2", text) + + return text + +def reformat_table(lines: List[str], right_margin: int) -> List[str]: + """ + Given the lines of a table + padds the cells and returns the new lines + """ + # find the maximum width of the columns + max_width = [len(x.rstrip()) + right_margin for x in lines[0].split("|")] + max_cols = len(max_width) + for line in lines: + cols = [x.rstrip() for x in line.split("|")] + num_cols = len(cols) + + # don't drop any data if colspan attributes result in unequal lengths + if num_cols < max_cols: + cols += [""] * (max_cols - num_cols) + elif max_cols < num_cols: + max_width += [len(x) + right_margin for x in cols[-(num_cols - max_cols) :]] + max_cols = num_cols + + max_width = [ + max(len(x) + right_margin, old_len) for x, old_len in zip(cols, max_width) + ] + + # reformat + new_lines = [] + for line in lines: + cols = [x.rstrip() for x in line.split("|")] + if set(line.strip()) == set("-|"): + filler = "-" + new_cols = [ + x.rstrip() + (filler * (M - len(x.rstrip()))) + for x, M in zip(cols, max_width) + ] + new_lines.append("|-" + "|".join(new_cols) + "|") + else: + filler = " " + new_cols = [ + x.rstrip() + (filler * (M - len(x.rstrip()))) + for x, M in zip(cols, max_width) + ] + new_lines.append("| " + "|".join(new_cols) + "|") + return new_lines + + +def pad_tables_in_text(text: str, right_margin: int = 1) -> str: + """ + Provide padding for tables in the text + """ + lines = text.split("\n") + table_buffer = [] # type: List[str] + table_started = False + new_lines = [] + for line in lines: + # Toggle table started + if config.TABLE_MARKER_FOR_PAD in line: + table_started = not table_started + if not table_started: + table = reformat_table(table_buffer, right_margin) + new_lines.extend(table) + table_buffer = [] + new_lines.append("") + continue + # Process lines + if table_started: + table_buffer.append(line) + else: + new_lines.append(line) + return "\n".join(new_lines) diff --git a/crawl4ai/markdown_generation_strategy.py b/crawl4ai/markdown_generation_strategy.py new file mode 100644 index 0000000..7922c41 --- /dev/null +++ b/crawl4ai/markdown_generation_strategy.py @@ -0,0 +1,116 @@ +from abc import ABC, abstractmethod +from typing import Optional, Dict, Any, Tuple +from .models import MarkdownGenerationResult +from .utils import CustomHTML2Text +from .content_filter_strategy import RelevantContentFilter, BM25ContentFilter +import re +from urllib.parse import urljoin + +# Pre-compile the regex pattern +LINK_PATTERN = re.compile(r'!?\[([^\]]+)\]\(([^)]+?)(?:\s+"([^"]*)")?\)') + +class MarkdownGenerationStrategy(ABC): + """Abstract base class for markdown generation strategies.""" + + @abstractmethod + def generate_markdown(self, + cleaned_html: str, + base_url: str = "", + html2text_options: Optional[Dict[str, Any]] = None, + content_filter: Optional[RelevantContentFilter] = None, + citations: bool = True, + **kwargs) -> MarkdownGenerationResult: + """Generate markdown from cleaned HTML.""" + pass + +class DefaultMarkdownGenerationStrategy(MarkdownGenerationStrategy): + """Default implementation of markdown generation strategy.""" + + def convert_links_to_citations(self, markdown: str, base_url: str = "") -> Tuple[str, str]: + link_map = {} + url_cache = {} # Cache for URL joins + parts = [] + last_end = 0 + counter = 1 + + for match in LINK_PATTERN.finditer(markdown): + parts.append(markdown[last_end:match.start()]) + text, url, title = match.groups() + + # Use cached URL if available, otherwise compute and cache + if base_url and not url.startswith(('http://', 'https://', 'mailto:')): + if url not in url_cache: + url_cache[url] = fast_urljoin(base_url, url) + url = url_cache[url] + + if url not in link_map: + desc = [] + if title: desc.append(title) + if text and text != title: desc.append(text) + link_map[url] = (counter, ": " + " - ".join(desc) if desc else "") + counter += 1 + + num = link_map[url][0] + parts.append(f"{text}⟨{num}⟩" if not match.group(0).startswith('!') else f"![{text}⟨{num}⟩]") + last_end = match.end() + + parts.append(markdown[last_end:]) + converted_text = ''.join(parts) + + # Pre-build reference strings + references = ["\n\n## References\n\n"] + references.extend( + f"⟨{num}⟩ {url}{desc}\n" + for url, (num, desc) in sorted(link_map.items(), key=lambda x: x[1][0]) + ) + + return converted_text, ''.join(references) + + def generate_markdown(self, + cleaned_html: str, + base_url: str = "", + html2text_options: Optional[Dict[str, Any]] = None, + content_filter: Optional[RelevantContentFilter] = None, + citations: bool = True, + **kwargs) -> MarkdownGenerationResult: + """Generate markdown with citations from cleaned HTML.""" + # Initialize HTML2Text with options + h = CustomHTML2Text() + if html2text_options: + h.update_params(**html2text_options) + + # Generate raw markdown + raw_markdown = h.handle(cleaned_html) + raw_markdown = raw_markdown.replace(' ```', '```') + + # Convert links to citations + if citations: + markdown_with_citations, references_markdown = self.convert_links_to_citations( + raw_markdown, base_url + ) + + # Generate fit markdown if content filter is provided + fit_markdown: Optional[str] = None + if content_filter: + filtered_html = content_filter.filter_content(cleaned_html) + filtered_html = '\n'.join('
    {}
    '.format(s) for s in filtered_html) + fit_markdown = h.handle(filtered_html) + + return MarkdownGenerationResult( + raw_markdown=raw_markdown, + markdown_with_citations=markdown_with_citations, + references_markdown=references_markdown, + fit_markdown=fit_markdown, + fit_html=filtered_html + ) + +def fast_urljoin(base: str, url: str) -> str: + """Fast URL joining for common cases.""" + if url.startswith(('http://', 'https://', 'mailto:', '//')): + return url + if url.startswith('/'): + # Handle absolute paths + if base.endswith('/'): + return base[:-1] + url + return base + url + return urljoin(base, url) \ No newline at end of file diff --git a/crawl4ai/migrations.py b/crawl4ai/migrations.py new file mode 100644 index 0000000..7761608 --- /dev/null +++ b/crawl4ai/migrations.py @@ -0,0 +1,152 @@ +import os +import asyncio +import logging +from pathlib import Path +import aiosqlite +from typing import Optional +import xxhash +import aiofiles +import shutil +import time +from datetime import datetime + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +class DatabaseMigration: + def __init__(self, db_path: str): + self.db_path = db_path + self.content_paths = self._ensure_content_dirs(os.path.dirname(db_path)) + + def _ensure_content_dirs(self, base_path: str) -> dict: + dirs = { + 'html': 'html_content', + 'cleaned': 'cleaned_html', + 'markdown': 'markdown_content', + 'extracted': 'extracted_content', + 'screenshots': 'screenshots' + } + content_paths = {} + for key, dirname in dirs.items(): + path = os.path.join(base_path, dirname) + os.makedirs(path, exist_ok=True) + content_paths[key] = path + return content_paths + + def _generate_content_hash(self, content: str) -> str: + x = xxhash.xxh64() + x.update(content.encode()) + content_hash = x.hexdigest() + return content_hash + # return hashlib.sha256(content.encode()).hexdigest() + + async def _store_content(self, content: str, content_type: str) -> str: + if not content: + return "" + + content_hash = self._generate_content_hash(content) + file_path = os.path.join(self.content_paths[content_type], content_hash) + + if not os.path.exists(file_path): + async with aiofiles.open(file_path, 'w', encoding='utf-8') as f: + await f.write(content) + + return content_hash + + async def migrate_database(self): + """Migrate existing database to file-based storage""" + logger.info("Starting database migration...") + + try: + async with aiosqlite.connect(self.db_path) as db: + # Get all rows + async with db.execute( + '''SELECT url, html, cleaned_html, markdown, + extracted_content, screenshot FROM crawled_data''' + ) as cursor: + rows = await cursor.fetchall() + + migrated_count = 0 + for row in rows: + url, html, cleaned_html, markdown, extracted_content, screenshot = row + + # Store content in files and get hashes + html_hash = await self._store_content(html, 'html') + cleaned_hash = await self._store_content(cleaned_html, 'cleaned') + markdown_hash = await self._store_content(markdown, 'markdown') + extracted_hash = await self._store_content(extracted_content, 'extracted') + screenshot_hash = await self._store_content(screenshot, 'screenshots') + + # Update database with hashes + await db.execute(''' + UPDATE crawled_data + SET html = ?, + cleaned_html = ?, + markdown = ?, + extracted_content = ?, + screenshot = ? + WHERE url = ? + ''', (html_hash, cleaned_hash, markdown_hash, + extracted_hash, screenshot_hash, url)) + + migrated_count += 1 + if migrated_count % 100 == 0: + logger.info(f"Migrated {migrated_count} records...") + + await db.commit() + logger.info(f"Migration completed. {migrated_count} records processed.") + + except Exception as e: + logger.error(f"Migration failed: {e}") + raise + +async def backup_database(db_path: str) -> str: + """Create backup of existing database""" + if not os.path.exists(db_path): + logger.info("No existing database found. Skipping backup.") + return None + + # Create backup with timestamp + timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + backup_path = f"{db_path}.backup_{timestamp}" + + try: + # Wait for any potential write operations to finish + await asyncio.sleep(1) + + # Create backup + shutil.copy2(db_path, backup_path) + logger.info(f"Database backup created at: {backup_path}") + return backup_path + except Exception as e: + logger.error(f"Backup failed: {e}") + raise + +async def run_migration(db_path: Optional[str] = None): + """Run database migration""" + if db_path is None: + db_path = os.path.join(Path.home(), ".crawl4ai", "crawl4ai.db") + + if not os.path.exists(db_path): + logger.info("No existing database found. Skipping migration.") + return + + # Create backup first + backup_path = await backup_database(db_path) + if not backup_path: + return + + migration = DatabaseMigration(db_path) + await migration.migrate_database() + +def main(): + """CLI entry point for migration""" + import argparse + parser = argparse.ArgumentParser(description='Migrate Crawl4AI database to file-based storage') + parser.add_argument('--db-path', help='Custom database path') + args = parser.parse_args() + + asyncio.run(run_migration(args.db_path)) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/crawl4ai/model_loader.py b/crawl4ai/model_loader.py index e33e53f..d1872d7 100644 --- a/crawl4ai/model_loader.py +++ b/crawl4ai/model_loader.py @@ -56,7 +56,7 @@ def set_model_device(model): @lru_cache() def get_home_folder(): - home_folder = os.path.join(Path.home(), ".crawl4ai") + home_folder = os.path.join(os.getenv("CRAWL4_AI_BASE_DIRECTORY", Path.home()), ".crawl4ai") os.makedirs(home_folder, exist_ok=True) os.makedirs(f"{home_folder}/cache", exist_ok=True) os.makedirs(f"{home_folder}/models", exist_ok=True) @@ -72,10 +72,18 @@ def load_bert_base_uncased(): return tokenizer, model @lru_cache() -def load_bge_small_en_v1_5(): +def load_HF_embedding_model(model_name="BAAI/bge-small-en-v1.5") -> tuple: + """Load the Hugging Face model for embedding. + + Args: + model_name (str, optional): The model name to load. Defaults to "BAAI/bge-small-en-v1.5". + + Returns: + tuple: The tokenizer and model. + """ from transformers import BertTokenizer, BertModel, AutoTokenizer, AutoModel - tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-small-en-v1.5', resume_download=None) - model = AutoModel.from_pretrained('BAAI/bge-small-en-v1.5', resume_download=None) + tokenizer = AutoTokenizer.from_pretrained(model_name, resume_download=None) + model = AutoModel.from_pretrained(model_name, resume_download=None) model.eval() model, device = set_model_device(model) return tokenizer, model diff --git a/crawl4ai/models.py b/crawl4ai/models.py index 151ccb4..3a1b8bd 100644 --- a/crawl4ai/models.py +++ b/crawl4ai/models.py @@ -1,10 +1,19 @@ from pydantic import BaseModel, HttpUrl -from typing import List, Dict, Optional +from typing import List, Dict, Optional, Callable, Awaitable, Union + + class UrlModel(BaseModel): url: HttpUrl forced: bool = False +class MarkdownGenerationResult(BaseModel): + raw_markdown: str + markdown_with_citations: str + references_markdown: str + fit_markdown: Optional[str] = None + fit_html: Optional[str] = None + class CrawlResult(BaseModel): url: str html: str @@ -12,11 +21,28 @@ class CrawlResult(BaseModel): cleaned_html: Optional[str] = None media: Dict[str, List[Dict]] = {} links: Dict[str, List[Dict]] = {} + downloaded_files: Optional[List[str]] = None screenshot: Optional[str] = None - markdown: Optional[str] = None + markdown: Optional[Union[str, MarkdownGenerationResult]] = None + markdown_v2: Optional[MarkdownGenerationResult] = None + fit_markdown: Optional[str] = None + fit_html: Optional[str] = None extracted_content: Optional[str] = None metadata: Optional[dict] = None error_message: Optional[str] = None session_id: Optional[str] = None response_headers: Optional[dict] = None - status_code: Optional[int] = None \ No newline at end of file + status_code: Optional[int] = None + +class AsyncCrawlResponse(BaseModel): + html: str + response_headers: Dict[str, str] + status_code: int + screenshot: Optional[str] = None + get_delayed_content: Optional[Callable[[Optional[float]], Awaitable[str]]] = None + downloaded_files: Optional[List[str]] = None + + class Config: + arbitrary_types_allowed = True + + diff --git a/crawl4ai/models/onnx/config.json b/crawl4ai/models/onnx/config.json deleted file mode 100644 index 6171b91..0000000 --- a/crawl4ai/models/onnx/config.json +++ /dev/null @@ -1,25 +0,0 @@ -{ - "_name_or_path": "sentence-transformers/all-MiniLM-L6-v2", - "architectures": [ - "BertModel" - ], - "attention_probs_dropout_prob": 0.1, - "classifier_dropout": null, - "gradient_checkpointing": false, - "hidden_act": "gelu", - "hidden_dropout_prob": 0.1, - "hidden_size": 384, - "initializer_range": 0.02, - "intermediate_size": 1536, - "layer_norm_eps": 1e-12, - "max_position_embeddings": 512, - "model_type": "bert", - "num_attention_heads": 12, - "num_hidden_layers": 6, - "pad_token_id": 0, - "position_embedding_type": "absolute", - "transformers_version": "4.27.4", - "type_vocab_size": 2, - "use_cache": true, - "vocab_size": 30522 -} diff --git a/crawl4ai/models/onnx/model.onnx b/crawl4ai/models/onnx/model.onnx deleted file mode 100644 index 9117c9b..0000000 Binary files a/crawl4ai/models/onnx/model.onnx and /dev/null differ diff --git a/crawl4ai/models/onnx/special_tokens_map.json b/crawl4ai/models/onnx/special_tokens_map.json deleted file mode 100644 index a8b3208..0000000 --- a/crawl4ai/models/onnx/special_tokens_map.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "cls_token": "[CLS]", - "mask_token": "[MASK]", - "pad_token": "[PAD]", - "sep_token": "[SEP]", - "unk_token": "[UNK]" -} diff --git a/crawl4ai/models/onnx/tokenizer.json b/crawl4ai/models/onnx/tokenizer.json deleted file mode 100644 index c17ed52..0000000 --- a/crawl4ai/models/onnx/tokenizer.json +++ /dev/null @@ -1,30686 +0,0 @@ -{ - "version": "1.0", - "truncation": { - "direction": "Right", - "max_length": 128, - "strategy": "LongestFirst", - "stride": 0 - }, - "padding": { - "strategy": { - "Fixed": 128 - }, - "direction": "Right", - "pad_to_multiple_of": null, - "pad_id": 0, - "pad_type_id": 0, - "pad_token": "[PAD]" - }, - "added_tokens": [ - { - "id": 0, - "content": "[PAD]", - "single_word": false, - "lstrip": false, - "rstrip": false, - "normalized": false, - "special": true - }, - { - "id": 100, - "content": "[UNK]", - "single_word": false, - "lstrip": false, - "rstrip": false, - "normalized": false, - "special": true - }, - { - "id": 101, - "content": "[CLS]", - "single_word": false, - "lstrip": false, - "rstrip": false, - "normalized": false, - "special": true - }, - { - "id": 102, - "content": "[SEP]", - "single_word": false, - "lstrip": false, - "rstrip": false, - "normalized": false, - "special": true - }, - { - "id": 103, - "content": "[MASK]", - "single_word": false, - "lstrip": false, - "rstrip": false, - "normalized": false, - "special": true - } - ], - "normalizer": { - "type": "BertNormalizer", - "clean_text": true, - "handle_chinese_chars": true, - "strip_accents": null, - "lowercase": true - }, - "pre_tokenizer": { - "type": "BertPreTokenizer" - }, - "post_processor": { - "type": "TemplateProcessing", - "single": [ - { - "SpecialToken": { - "id": "[CLS]", - "type_id": 0 - } - }, - { - "Sequence": { - "id": "A", - "type_id": 0 - } - }, - { - "SpecialToken": { - "id": "[SEP]", - "type_id": 0 - } - } - ], - "pair": [ - { - "SpecialToken": { - "id": "[CLS]", - "type_id": 0 - } - }, - { - "Sequence": { - "id": "A", - "type_id": 0 - } - }, - { - "SpecialToken": { - "id": "[SEP]", - "type_id": 0 - } - }, - { - "Sequence": { - "id": "B", - "type_id": 1 - } - }, - { - "SpecialToken": { - "id": "[SEP]", - "type_id": 1 - } - } - ], - "special_tokens": { - "[CLS]": { - "id": "[CLS]", - "ids": [ - 101 - ], - "tokens": [ - "[CLS]" - ] - }, - "[SEP]": { - "id": "[SEP]", - "ids": [ - 102 - ], - "tokens": [ - "[SEP]" - ] - } - } - }, - "decoder": { - "type": "WordPiece", - "prefix": "##", - "cleanup": true - }, - "model": { - "type": "WordPiece", - "unk_token": "[UNK]", - "continuing_subword_prefix": "##", - "max_input_chars_per_word": 100, - "vocab": { - "[PAD]": 0, - "[unused0]": 1, - "[unused1]": 2, - "[unused2]": 3, - "[unused3]": 4, - "[unused4]": 5, - "[unused5]": 6, - "[unused6]": 7, - "[unused7]": 8, - "[unused8]": 9, - "[unused9]": 10, - "[unused10]": 11, - "[unused11]": 12, - "[unused12]": 13, - "[unused13]": 14, - "[unused14]": 15, - "[unused15]": 16, - "[unused16]": 17, - "[unused17]": 18, - "[unused18]": 19, - "[unused19]": 20, - "[unused20]": 21, - "[unused21]": 22, - "[unused22]": 23, - "[unused23]": 24, - "[unused24]": 25, - "[unused25]": 26, - "[unused26]": 27, - "[unused27]": 28, - "[unused28]": 29, - "[unused29]": 30, - "[unused30]": 31, - "[unused31]": 32, - "[unused32]": 33, - "[unused33]": 34, - "[unused34]": 35, - "[unused35]": 36, - "[unused36]": 37, - "[unused37]": 38, - "[unused38]": 39, - "[unused39]": 40, - "[unused40]": 41, - "[unused41]": 42, - "[unused42]": 43, - "[unused43]": 44, - "[unused44]": 45, - "[unused45]": 46, - "[unused46]": 47, - "[unused47]": 48, - "[unused48]": 49, - "[unused49]": 50, - "[unused50]": 51, - "[unused51]": 52, - "[unused52]": 53, - "[unused53]": 54, - "[unused54]": 55, - "[unused55]": 56, - "[unused56]": 57, - "[unused57]": 58, - "[unused58]": 59, - "[unused59]": 60, - "[unused60]": 61, - "[unused61]": 62, - "[unused62]": 63, - "[unused63]": 64, - "[unused64]": 65, - "[unused65]": 66, - "[unused66]": 67, - "[unused67]": 68, - "[unused68]": 69, - "[unused69]": 70, - "[unused70]": 71, - "[unused71]": 72, - "[unused72]": 73, - "[unused73]": 74, - "[unused74]": 75, - "[unused75]": 76, - "[unused76]": 77, - "[unused77]": 78, - "[unused78]": 79, - "[unused79]": 80, - "[unused80]": 81, - "[unused81]": 82, - "[unused82]": 83, - "[unused83]": 84, - "[unused84]": 85, - "[unused85]": 86, - "[unused86]": 87, - "[unused87]": 88, - "[unused88]": 89, - "[unused89]": 90, - "[unused90]": 91, - "[unused91]": 92, - "[unused92]": 93, - "[unused93]": 94, - "[unused94]": 95, - "[unused95]": 96, - "[unused96]": 97, - "[unused97]": 98, - "[unused98]": 99, - "[UNK]": 100, - "[CLS]": 101, - "[SEP]": 102, - "[MASK]": 103, - "[unused99]": 104, - "[unused100]": 105, - "[unused101]": 106, - "[unused102]": 107, - "[unused103]": 108, - "[unused104]": 109, - "[unused105]": 110, - "[unused106]": 111, - "[unused107]": 112, - "[unused108]": 113, - "[unused109]": 114, - "[unused110]": 115, - "[unused111]": 116, - "[unused112]": 117, - "[unused113]": 118, - "[unused114]": 119, - "[unused115]": 120, - "[unused116]": 121, - "[unused117]": 122, - "[unused118]": 123, - "[unused119]": 124, - "[unused120]": 125, - "[unused121]": 126, - "[unused122]": 127, - "[unused123]": 128, - "[unused124]": 129, - "[unused125]": 130, - "[unused126]": 131, - "[unused127]": 132, - "[unused128]": 133, - "[unused129]": 134, - "[unused130]": 135, - "[unused131]": 136, - "[unused132]": 137, - "[unused133]": 138, - "[unused134]": 139, - "[unused135]": 140, - "[unused136]": 141, - "[unused137]": 142, - "[unused138]": 143, - "[unused139]": 144, - "[unused140]": 145, - "[unused141]": 146, - "[unused142]": 147, - "[unused143]": 148, - "[unused144]": 149, - "[unused145]": 150, - "[unused146]": 151, - "[unused147]": 152, - "[unused148]": 153, - "[unused149]": 154, - "[unused150]": 155, - "[unused151]": 156, - "[unused152]": 157, - "[unused153]": 158, - "[unused154]": 159, - "[unused155]": 160, - "[unused156]": 161, - "[unused157]": 162, - "[unused158]": 163, - "[unused159]": 164, - "[unused160]": 165, - "[unused161]": 166, - "[unused162]": 167, - "[unused163]": 168, - "[unused164]": 169, - "[unused165]": 170, - "[unused166]": 171, - "[unused167]": 172, - "[unused168]": 173, - "[unused169]": 174, - "[unused170]": 175, - "[unused171]": 176, - "[unused172]": 177, - "[unused173]": 178, - "[unused174]": 179, - "[unused175]": 180, - "[unused176]": 181, - "[unused177]": 182, - "[unused178]": 183, - "[unused179]": 184, - "[unused180]": 185, - "[unused181]": 186, - "[unused182]": 187, - "[unused183]": 188, - "[unused184]": 189, - "[unused185]": 190, - "[unused186]": 191, - "[unused187]": 192, - "[unused188]": 193, - "[unused189]": 194, - "[unused190]": 195, - "[unused191]": 196, - "[unused192]": 197, - "[unused193]": 198, - "[unused194]": 199, - "[unused195]": 200, - "[unused196]": 201, - "[unused197]": 202, - "[unused198]": 203, - "[unused199]": 204, - "[unused200]": 205, - "[unused201]": 206, - "[unused202]": 207, - "[unused203]": 208, - "[unused204]": 209, - "[unused205]": 210, - "[unused206]": 211, - "[unused207]": 212, - "[unused208]": 213, - "[unused209]": 214, - "[unused210]": 215, - "[unused211]": 216, - "[unused212]": 217, - "[unused213]": 218, - "[unused214]": 219, - "[unused215]": 220, - "[unused216]": 221, - "[unused217]": 222, - "[unused218]": 223, - "[unused219]": 224, - "[unused220]": 225, - "[unused221]": 226, - "[unused222]": 227, - "[unused223]": 228, - "[unused224]": 229, - "[unused225]": 230, - "[unused226]": 231, - "[unused227]": 232, - "[unused228]": 233, - "[unused229]": 234, - "[unused230]": 235, - "[unused231]": 236, - "[unused232]": 237, - "[unused233]": 238, - "[unused234]": 239, - "[unused235]": 240, - "[unused236]": 241, - "[unused237]": 242, - "[unused238]": 243, - "[unused239]": 244, - "[unused240]": 245, - "[unused241]": 246, - "[unused242]": 247, - "[unused243]": 248, - "[unused244]": 249, - "[unused245]": 250, - "[unused246]": 251, - "[unused247]": 252, - "[unused248]": 253, - "[unused249]": 254, - "[unused250]": 255, - "[unused251]": 256, - "[unused252]": 257, - "[unused253]": 258, - "[unused254]": 259, - "[unused255]": 260, - "[unused256]": 261, - "[unused257]": 262, - "[unused258]": 263, - "[unused259]": 264, - "[unused260]": 265, - "[unused261]": 266, - "[unused262]": 267, - "[unused263]": 268, - "[unused264]": 269, - "[unused265]": 270, - "[unused266]": 271, - "[unused267]": 272, - "[unused268]": 273, - "[unused269]": 274, - "[unused270]": 275, - "[unused271]": 276, - "[unused272]": 277, - "[unused273]": 278, - "[unused274]": 279, - "[unused275]": 280, - "[unused276]": 281, - "[unused277]": 282, - "[unused278]": 283, - "[unused279]": 284, - "[unused280]": 285, - "[unused281]": 286, - "[unused282]": 287, - "[unused283]": 288, - "[unused284]": 289, - "[unused285]": 290, - "[unused286]": 291, - "[unused287]": 292, - "[unused288]": 293, - "[unused289]": 294, - "[unused290]": 295, - "[unused291]": 296, - "[unused292]": 297, - "[unused293]": 298, - "[unused294]": 299, - "[unused295]": 300, - "[unused296]": 301, - "[unused297]": 302, - "[unused298]": 303, - "[unused299]": 304, - "[unused300]": 305, - "[unused301]": 306, - "[unused302]": 307, - "[unused303]": 308, - "[unused304]": 309, - "[unused305]": 310, - "[unused306]": 311, - "[unused307]": 312, - "[unused308]": 313, - "[unused309]": 314, - "[unused310]": 315, - "[unused311]": 316, - "[unused312]": 317, - "[unused313]": 318, - "[unused314]": 319, - "[unused315]": 320, - "[unused316]": 321, - "[unused317]": 322, - "[unused318]": 323, - "[unused319]": 324, - "[unused320]": 325, - "[unused321]": 326, - "[unused322]": 327, - "[unused323]": 328, - "[unused324]": 329, - "[unused325]": 330, - "[unused326]": 331, - "[unused327]": 332, - "[unused328]": 333, - "[unused329]": 334, - "[unused330]": 335, - "[unused331]": 336, - "[unused332]": 337, - "[unused333]": 338, - "[unused334]": 339, - "[unused335]": 340, - "[unused336]": 341, - "[unused337]": 342, - "[unused338]": 343, - "[unused339]": 344, - "[unused340]": 345, - "[unused341]": 346, - "[unused342]": 347, - "[unused343]": 348, - "[unused344]": 349, - "[unused345]": 350, - "[unused346]": 351, - "[unused347]": 352, - "[unused348]": 353, - "[unused349]": 354, - "[unused350]": 355, - "[unused351]": 356, - "[unused352]": 357, - "[unused353]": 358, - "[unused354]": 359, - "[unused355]": 360, - "[unused356]": 361, - "[unused357]": 362, - "[unused358]": 363, - "[unused359]": 364, - "[unused360]": 365, - "[unused361]": 366, - "[unused362]": 367, - "[unused363]": 368, - "[unused364]": 369, - "[unused365]": 370, - "[unused366]": 371, - "[unused367]": 372, - "[unused368]": 373, - "[unused369]": 374, - "[unused370]": 375, - "[unused371]": 376, - "[unused372]": 377, - "[unused373]": 378, - "[unused374]": 379, - "[unused375]": 380, - "[unused376]": 381, - "[unused377]": 382, - "[unused378]": 383, - "[unused379]": 384, - "[unused380]": 385, - "[unused381]": 386, - "[unused382]": 387, - "[unused383]": 388, - "[unused384]": 389, - "[unused385]": 390, - "[unused386]": 391, - "[unused387]": 392, - "[unused388]": 393, - "[unused389]": 394, - "[unused390]": 395, - "[unused391]": 396, - "[unused392]": 397, - "[unused393]": 398, - "[unused394]": 399, - "[unused395]": 400, - "[unused396]": 401, - "[unused397]": 402, - "[unused398]": 403, - "[unused399]": 404, - "[unused400]": 405, - "[unused401]": 406, - "[unused402]": 407, - "[unused403]": 408, - "[unused404]": 409, - "[unused405]": 410, - "[unused406]": 411, - "[unused407]": 412, - "[unused408]": 413, - "[unused409]": 414, - "[unused410]": 415, - "[unused411]": 416, - "[unused412]": 417, - "[unused413]": 418, - "[unused414]": 419, - "[unused415]": 420, - "[unused416]": 421, - "[unused417]": 422, - "[unused418]": 423, - "[unused419]": 424, - "[unused420]": 425, - "[unused421]": 426, - "[unused422]": 427, - "[unused423]": 428, - "[unused424]": 429, - "[unused425]": 430, - "[unused426]": 431, - "[unused427]": 432, - "[unused428]": 433, - "[unused429]": 434, - "[unused430]": 435, - "[unused431]": 436, - "[unused432]": 437, - "[unused433]": 438, - "[unused434]": 439, - "[unused435]": 440, - "[unused436]": 441, - "[unused437]": 442, - "[unused438]": 443, - "[unused439]": 444, - "[unused440]": 445, - "[unused441]": 446, - "[unused442]": 447, - "[unused443]": 448, - "[unused444]": 449, - "[unused445]": 450, - "[unused446]": 451, - "[unused447]": 452, - "[unused448]": 453, - "[unused449]": 454, - "[unused450]": 455, - "[unused451]": 456, - "[unused452]": 457, - "[unused453]": 458, - "[unused454]": 459, - "[unused455]": 460, - "[unused456]": 461, - "[unused457]": 462, - "[unused458]": 463, - "[unused459]": 464, - "[unused460]": 465, - "[unused461]": 466, - "[unused462]": 467, - "[unused463]": 468, - "[unused464]": 469, - "[unused465]": 470, - "[unused466]": 471, - "[unused467]": 472, - "[unused468]": 473, - "[unused469]": 474, - "[unused470]": 475, - "[unused471]": 476, - "[unused472]": 477, - "[unused473]": 478, - "[unused474]": 479, - "[unused475]": 480, - "[unused476]": 481, - "[unused477]": 482, - "[unused478]": 483, - "[unused479]": 484, - "[unused480]": 485, - "[unused481]": 486, - "[unused482]": 487, - "[unused483]": 488, - "[unused484]": 489, - "[unused485]": 490, - "[unused486]": 491, - "[unused487]": 492, - "[unused488]": 493, - "[unused489]": 494, - "[unused490]": 495, - "[unused491]": 496, - "[unused492]": 497, - "[unused493]": 498, - "[unused494]": 499, - "[unused495]": 500, - "[unused496]": 501, - "[unused497]": 502, - "[unused498]": 503, - "[unused499]": 504, - "[unused500]": 505, - "[unused501]": 506, - "[unused502]": 507, - "[unused503]": 508, - "[unused504]": 509, - "[unused505]": 510, - "[unused506]": 511, - "[unused507]": 512, - "[unused508]": 513, - "[unused509]": 514, - "[unused510]": 515, - "[unused511]": 516, - "[unused512]": 517, - "[unused513]": 518, - "[unused514]": 519, - "[unused515]": 520, - "[unused516]": 521, - "[unused517]": 522, - "[unused518]": 523, - "[unused519]": 524, - "[unused520]": 525, - "[unused521]": 526, - "[unused522]": 527, - "[unused523]": 528, - "[unused524]": 529, - "[unused525]": 530, - "[unused526]": 531, - "[unused527]": 532, - "[unused528]": 533, - "[unused529]": 534, - "[unused530]": 535, - "[unused531]": 536, - "[unused532]": 537, - "[unused533]": 538, - "[unused534]": 539, - "[unused535]": 540, - "[unused536]": 541, - "[unused537]": 542, - "[unused538]": 543, - "[unused539]": 544, - "[unused540]": 545, - "[unused541]": 546, - "[unused542]": 547, - "[unused543]": 548, - "[unused544]": 549, - "[unused545]": 550, - "[unused546]": 551, - "[unused547]": 552, - "[unused548]": 553, - "[unused549]": 554, - "[unused550]": 555, - "[unused551]": 556, - "[unused552]": 557, - "[unused553]": 558, - "[unused554]": 559, - "[unused555]": 560, - "[unused556]": 561, - "[unused557]": 562, - "[unused558]": 563, - "[unused559]": 564, - "[unused560]": 565, - "[unused561]": 566, - "[unused562]": 567, - "[unused563]": 568, - "[unused564]": 569, - "[unused565]": 570, - "[unused566]": 571, - "[unused567]": 572, - "[unused568]": 573, - "[unused569]": 574, - "[unused570]": 575, - "[unused571]": 576, - "[unused572]": 577, - "[unused573]": 578, - "[unused574]": 579, - "[unused575]": 580, - "[unused576]": 581, - "[unused577]": 582, - "[unused578]": 583, - "[unused579]": 584, - "[unused580]": 585, - "[unused581]": 586, - "[unused582]": 587, - "[unused583]": 588, - "[unused584]": 589, - "[unused585]": 590, - "[unused586]": 591, - "[unused587]": 592, - "[unused588]": 593, - "[unused589]": 594, - "[unused590]": 595, - "[unused591]": 596, - "[unused592]": 597, - "[unused593]": 598, - "[unused594]": 599, - "[unused595]": 600, - "[unused596]": 601, - "[unused597]": 602, - "[unused598]": 603, - "[unused599]": 604, - "[unused600]": 605, - "[unused601]": 606, - "[unused602]": 607, - "[unused603]": 608, - "[unused604]": 609, - "[unused605]": 610, - "[unused606]": 611, - "[unused607]": 612, - "[unused608]": 613, - "[unused609]": 614, - "[unused610]": 615, - "[unused611]": 616, - "[unused612]": 617, - "[unused613]": 618, - "[unused614]": 619, - "[unused615]": 620, - "[unused616]": 621, - "[unused617]": 622, - "[unused618]": 623, - "[unused619]": 624, - "[unused620]": 625, - "[unused621]": 626, - "[unused622]": 627, - "[unused623]": 628, - "[unused624]": 629, - "[unused625]": 630, - "[unused626]": 631, - "[unused627]": 632, - "[unused628]": 633, - "[unused629]": 634, - "[unused630]": 635, - "[unused631]": 636, - "[unused632]": 637, - "[unused633]": 638, - "[unused634]": 639, - "[unused635]": 640, - "[unused636]": 641, - "[unused637]": 642, - "[unused638]": 643, - "[unused639]": 644, - "[unused640]": 645, - "[unused641]": 646, - "[unused642]": 647, - "[unused643]": 648, - "[unused644]": 649, - "[unused645]": 650, - "[unused646]": 651, - "[unused647]": 652, - "[unused648]": 653, - "[unused649]": 654, - "[unused650]": 655, - "[unused651]": 656, - "[unused652]": 657, - "[unused653]": 658, - "[unused654]": 659, - "[unused655]": 660, - "[unused656]": 661, - "[unused657]": 662, - "[unused658]": 663, - "[unused659]": 664, - "[unused660]": 665, - "[unused661]": 666, - "[unused662]": 667, - "[unused663]": 668, - "[unused664]": 669, - "[unused665]": 670, - "[unused666]": 671, - "[unused667]": 672, - "[unused668]": 673, - "[unused669]": 674, - "[unused670]": 675, - "[unused671]": 676, - "[unused672]": 677, - "[unused673]": 678, - "[unused674]": 679, - "[unused675]": 680, - "[unused676]": 681, - "[unused677]": 682, - "[unused678]": 683, - "[unused679]": 684, - "[unused680]": 685, - "[unused681]": 686, - "[unused682]": 687, - "[unused683]": 688, - "[unused684]": 689, - "[unused685]": 690, - "[unused686]": 691, - "[unused687]": 692, - "[unused688]": 693, - "[unused689]": 694, - "[unused690]": 695, - "[unused691]": 696, - "[unused692]": 697, - "[unused693]": 698, - "[unused694]": 699, - "[unused695]": 700, - "[unused696]": 701, - "[unused697]": 702, - "[unused698]": 703, - "[unused699]": 704, - "[unused700]": 705, - "[unused701]": 706, - "[unused702]": 707, - "[unused703]": 708, - "[unused704]": 709, - "[unused705]": 710, - "[unused706]": 711, - "[unused707]": 712, - "[unused708]": 713, - "[unused709]": 714, - "[unused710]": 715, - "[unused711]": 716, - "[unused712]": 717, - "[unused713]": 718, - "[unused714]": 719, - "[unused715]": 720, - "[unused716]": 721, - "[unused717]": 722, - "[unused718]": 723, - "[unused719]": 724, - "[unused720]": 725, - "[unused721]": 726, - "[unused722]": 727, - "[unused723]": 728, - "[unused724]": 729, - "[unused725]": 730, - "[unused726]": 731, - "[unused727]": 732, - "[unused728]": 733, - "[unused729]": 734, - "[unused730]": 735, - "[unused731]": 736, - "[unused732]": 737, - "[unused733]": 738, - "[unused734]": 739, - "[unused735]": 740, - "[unused736]": 741, - "[unused737]": 742, - "[unused738]": 743, - "[unused739]": 744, - "[unused740]": 745, - "[unused741]": 746, - "[unused742]": 747, - "[unused743]": 748, - "[unused744]": 749, - "[unused745]": 750, - "[unused746]": 751, - "[unused747]": 752, - "[unused748]": 753, - "[unused749]": 754, - "[unused750]": 755, - "[unused751]": 756, - "[unused752]": 757, - "[unused753]": 758, - "[unused754]": 759, - "[unused755]": 760, - "[unused756]": 761, - "[unused757]": 762, - "[unused758]": 763, - "[unused759]": 764, - "[unused760]": 765, - "[unused761]": 766, - "[unused762]": 767, - "[unused763]": 768, - "[unused764]": 769, - "[unused765]": 770, - "[unused766]": 771, - "[unused767]": 772, - "[unused768]": 773, - "[unused769]": 774, - "[unused770]": 775, - "[unused771]": 776, - "[unused772]": 777, - "[unused773]": 778, - "[unused774]": 779, - "[unused775]": 780, - "[unused776]": 781, - "[unused777]": 782, - "[unused778]": 783, - "[unused779]": 784, - "[unused780]": 785, - "[unused781]": 786, - "[unused782]": 787, - "[unused783]": 788, - "[unused784]": 789, - "[unused785]": 790, - "[unused786]": 791, - "[unused787]": 792, - "[unused788]": 793, - "[unused789]": 794, - "[unused790]": 795, - "[unused791]": 796, - "[unused792]": 797, - "[unused793]": 798, - "[unused794]": 799, - "[unused795]": 800, - "[unused796]": 801, - "[unused797]": 802, - "[unused798]": 803, - "[unused799]": 804, - "[unused800]": 805, - "[unused801]": 806, - "[unused802]": 807, - "[unused803]": 808, - "[unused804]": 809, - "[unused805]": 810, - "[unused806]": 811, - "[unused807]": 812, - "[unused808]": 813, - "[unused809]": 814, - "[unused810]": 815, - "[unused811]": 816, - "[unused812]": 817, - "[unused813]": 818, - "[unused814]": 819, - "[unused815]": 820, - "[unused816]": 821, - "[unused817]": 822, - "[unused818]": 823, - "[unused819]": 824, - "[unused820]": 825, - "[unused821]": 826, - "[unused822]": 827, - "[unused823]": 828, - "[unused824]": 829, - "[unused825]": 830, - "[unused826]": 831, - "[unused827]": 832, - "[unused828]": 833, - "[unused829]": 834, - "[unused830]": 835, - "[unused831]": 836, - "[unused832]": 837, - "[unused833]": 838, - "[unused834]": 839, - "[unused835]": 840, - "[unused836]": 841, - "[unused837]": 842, - "[unused838]": 843, - "[unused839]": 844, - "[unused840]": 845, - "[unused841]": 846, - "[unused842]": 847, - "[unused843]": 848, - "[unused844]": 849, - "[unused845]": 850, - "[unused846]": 851, - "[unused847]": 852, - "[unused848]": 853, - "[unused849]": 854, - "[unused850]": 855, - "[unused851]": 856, - "[unused852]": 857, - "[unused853]": 858, - "[unused854]": 859, - "[unused855]": 860, - "[unused856]": 861, - "[unused857]": 862, - "[unused858]": 863, - "[unused859]": 864, - "[unused860]": 865, - "[unused861]": 866, - "[unused862]": 867, - "[unused863]": 868, - "[unused864]": 869, - "[unused865]": 870, - "[unused866]": 871, - "[unused867]": 872, - "[unused868]": 873, - "[unused869]": 874, - "[unused870]": 875, - "[unused871]": 876, - "[unused872]": 877, - "[unused873]": 878, - "[unused874]": 879, - "[unused875]": 880, - "[unused876]": 881, - "[unused877]": 882, - "[unused878]": 883, - "[unused879]": 884, - "[unused880]": 885, - "[unused881]": 886, - "[unused882]": 887, - "[unused883]": 888, - "[unused884]": 889, - "[unused885]": 890, - "[unused886]": 891, - "[unused887]": 892, - "[unused888]": 893, - "[unused889]": 894, - "[unused890]": 895, - "[unused891]": 896, - "[unused892]": 897, - "[unused893]": 898, - "[unused894]": 899, - "[unused895]": 900, - "[unused896]": 901, - "[unused897]": 902, - "[unused898]": 903, - "[unused899]": 904, - "[unused900]": 905, - "[unused901]": 906, - "[unused902]": 907, - "[unused903]": 908, - "[unused904]": 909, - "[unused905]": 910, - "[unused906]": 911, - "[unused907]": 912, - "[unused908]": 913, - "[unused909]": 914, - "[unused910]": 915, - "[unused911]": 916, - "[unused912]": 917, - "[unused913]": 918, - "[unused914]": 919, - "[unused915]": 920, - "[unused916]": 921, - "[unused917]": 922, - "[unused918]": 923, - "[unused919]": 924, - "[unused920]": 925, - "[unused921]": 926, - "[unused922]": 927, - "[unused923]": 928, - "[unused924]": 929, - "[unused925]": 930, - "[unused926]": 931, - "[unused927]": 932, - "[unused928]": 933, - "[unused929]": 934, - "[unused930]": 935, - "[unused931]": 936, - "[unused932]": 937, - "[unused933]": 938, - "[unused934]": 939, - "[unused935]": 940, - "[unused936]": 941, - "[unused937]": 942, - "[unused938]": 943, - "[unused939]": 944, - "[unused940]": 945, - "[unused941]": 946, - "[unused942]": 947, - "[unused943]": 948, - "[unused944]": 949, - "[unused945]": 950, - "[unused946]": 951, - "[unused947]": 952, - "[unused948]": 953, - "[unused949]": 954, - "[unused950]": 955, - "[unused951]": 956, - "[unused952]": 957, - "[unused953]": 958, - "[unused954]": 959, - "[unused955]": 960, - "[unused956]": 961, - "[unused957]": 962, - "[unused958]": 963, - "[unused959]": 964, - "[unused960]": 965, - "[unused961]": 966, - "[unused962]": 967, - "[unused963]": 968, - "[unused964]": 969, - "[unused965]": 970, - "[unused966]": 971, - "[unused967]": 972, - "[unused968]": 973, - "[unused969]": 974, - "[unused970]": 975, - "[unused971]": 976, - "[unused972]": 977, - "[unused973]": 978, - "[unused974]": 979, - "[unused975]": 980, - "[unused976]": 981, - "[unused977]": 982, - "[unused978]": 983, - "[unused979]": 984, - "[unused980]": 985, - "[unused981]": 986, - "[unused982]": 987, - "[unused983]": 988, - "[unused984]": 989, - "[unused985]": 990, - "[unused986]": 991, - "[unused987]": 992, - "[unused988]": 993, - "[unused989]": 994, - "[unused990]": 995, - "[unused991]": 996, - "[unused992]": 997, - "[unused993]": 998, - "!": 999, - "\"": 1000, - "#": 1001, - "$": 1002, - "%": 1003, - "&": 1004, - "'": 1005, - "(": 1006, - ")": 1007, - "*": 1008, - "+": 1009, - ",": 1010, - "-": 1011, - ".": 1012, - "/": 1013, - "0": 1014, - "1": 1015, - "2": 1016, - "3": 1017, - "4": 1018, - "5": 1019, - "6": 1020, - "7": 1021, - "8": 1022, - "9": 1023, - ":": 1024, - ";": 1025, - "<": 1026, - "=": 1027, - ">": 1028, - "?": 1029, - "@": 1030, - "[": 1031, - "\\": 1032, - "]": 1033, - "^": 1034, - "_": 1035, - "`": 1036, - "a": 1037, - "b": 1038, - "c": 1039, - "d": 1040, - "e": 1041, - "f": 1042, - "g": 1043, - "h": 1044, - "i": 1045, - "j": 1046, - "k": 1047, - "l": 1048, - "m": 1049, - "n": 1050, - "o": 1051, - "p": 1052, - "q": 1053, - "r": 1054, - "s": 1055, - "t": 1056, - "u": 1057, - "v": 1058, - "w": 1059, - "x": 1060, - "y": 1061, - "z": 1062, - "{": 1063, - "|": 1064, - "}": 1065, - "~": 1066, - "¡": 1067, - "¢": 1068, - "£": 1069, - "¤": 1070, - "¥": 1071, - "¦": 1072, - "§": 1073, - "¨": 1074, - "©": 1075, - "ª": 1076, - "«": 1077, - "¬": 1078, - "®": 1079, - "°": 1080, - "±": 1081, - "²": 1082, - "³": 1083, - "´": 1084, - "µ": 1085, - "¶": 1086, - "·": 1087, - "¹": 1088, - "º": 1089, - "»": 1090, - "¼": 1091, - "½": 1092, - "¾": 1093, - "¿": 1094, - "×": 1095, - "ß": 1096, - "æ": 1097, - "ð": 1098, - "÷": 1099, - "ø": 1100, - "þ": 1101, - "đ": 1102, - "ħ": 1103, - "ı": 1104, - "ł": 1105, - "ŋ": 1106, - "œ": 1107, - "ƒ": 1108, - "ɐ": 1109, - "ɑ": 1110, - "ɒ": 1111, - "ɔ": 1112, - "ɕ": 1113, - "ə": 1114, - "ɛ": 1115, - "ɡ": 1116, - "ɣ": 1117, - "ɨ": 1118, - "ɪ": 1119, - "ɫ": 1120, - "ɬ": 1121, - "ɯ": 1122, - "ɲ": 1123, - "ɴ": 1124, - "ɹ": 1125, - "ɾ": 1126, - "ʀ": 1127, - "ʁ": 1128, - "ʂ": 1129, - "ʃ": 1130, - "ʉ": 1131, - "ʊ": 1132, - "ʋ": 1133, - "ʌ": 1134, - "ʎ": 1135, - "ʐ": 1136, - "ʑ": 1137, - "ʒ": 1138, - "ʔ": 1139, - "ʰ": 1140, - "ʲ": 1141, - "ʳ": 1142, - "ʷ": 1143, - "ʸ": 1144, - "ʻ": 1145, - "ʼ": 1146, - "ʾ": 1147, - "ʿ": 1148, - "ˈ": 1149, - "ː": 1150, - "ˡ": 1151, - "ˢ": 1152, - "ˣ": 1153, - "ˤ": 1154, - "α": 1155, - "β": 1156, - "γ": 1157, - "δ": 1158, - "ε": 1159, - "ζ": 1160, - "η": 1161, - "θ": 1162, - "ι": 1163, - "κ": 1164, - "λ": 1165, - "μ": 1166, - "ν": 1167, - "ξ": 1168, - "ο": 1169, - "π": 1170, - "ρ": 1171, - "ς": 1172, - "σ": 1173, - "τ": 1174, - "υ": 1175, - "φ": 1176, - "χ": 1177, - "ψ": 1178, - "ω": 1179, - "а": 1180, - "б": 1181, - "в": 1182, - "г": 1183, - "д": 1184, - "е": 1185, - "ж": 1186, - "з": 1187, - "и": 1188, - "к": 1189, - "л": 1190, - "м": 1191, - "н": 1192, - "о": 1193, - "п": 1194, - "р": 1195, - "с": 1196, - "т": 1197, - "у": 1198, - "ф": 1199, - "х": 1200, - "ц": 1201, - "ч": 1202, - "ш": 1203, - "щ": 1204, - "ъ": 1205, - "ы": 1206, - "ь": 1207, - "э": 1208, - "ю": 1209, - "я": 1210, - "ђ": 1211, - "є": 1212, - "і": 1213, - "ј": 1214, - "љ": 1215, - "њ": 1216, - "ћ": 1217, - "ӏ": 1218, - "ա": 1219, - "բ": 1220, - "գ": 1221, - "դ": 1222, - "ե": 1223, - "թ": 1224, - "ի": 1225, - "լ": 1226, - "կ": 1227, - "հ": 1228, - "մ": 1229, - "յ": 1230, - "ն": 1231, - "ո": 1232, - "պ": 1233, - "ս": 1234, - "վ": 1235, - "տ": 1236, - "ր": 1237, - "ւ": 1238, - "ք": 1239, - "־": 1240, - "א": 1241, - "ב": 1242, - "ג": 1243, - "ד": 1244, - "ה": 1245, - "ו": 1246, - "ז": 1247, - "ח": 1248, - "ט": 1249, - "י": 1250, - "ך": 1251, - "כ": 1252, - "ל": 1253, - "ם": 1254, - "מ": 1255, - "ן": 1256, - "נ": 1257, - "ס": 1258, - "ע": 1259, - "ף": 1260, - "פ": 1261, - "ץ": 1262, - "צ": 1263, - "ק": 1264, - "ר": 1265, - "ש": 1266, - "ת": 1267, - "،": 1268, - "ء": 1269, - "ا": 1270, - "ب": 1271, - "ة": 1272, - "ت": 1273, - "ث": 1274, - "ج": 1275, - "ح": 1276, - "خ": 1277, - "د": 1278, - "ذ": 1279, - "ر": 1280, - "ز": 1281, - "س": 1282, - "ش": 1283, - "ص": 1284, - "ض": 1285, - "ط": 1286, - "ظ": 1287, - "ع": 1288, - "غ": 1289, - "ـ": 1290, - "ف": 1291, - "ق": 1292, - "ك": 1293, - "ل": 1294, - "م": 1295, - "ن": 1296, - "ه": 1297, - "و": 1298, - "ى": 1299, - "ي": 1300, - "ٹ": 1301, - "پ": 1302, - "چ": 1303, - "ک": 1304, - "گ": 1305, - "ں": 1306, - "ھ": 1307, - "ہ": 1308, - "ی": 1309, - "ے": 1310, - "अ": 1311, - "आ": 1312, - "उ": 1313, - "ए": 1314, - "क": 1315, - "ख": 1316, - "ग": 1317, - "च": 1318, - "ज": 1319, - "ट": 1320, - "ड": 1321, - "ण": 1322, - "त": 1323, - "थ": 1324, - "द": 1325, - "ध": 1326, - "न": 1327, - "प": 1328, - "ब": 1329, - "भ": 1330, - "म": 1331, - "य": 1332, - "र": 1333, - "ल": 1334, - "व": 1335, - "श": 1336, - "ष": 1337, - "स": 1338, - "ह": 1339, - "ा": 1340, - "ि": 1341, - "ी": 1342, - "ो": 1343, - "।": 1344, - "॥": 1345, - "ং": 1346, - "অ": 1347, - "আ": 1348, - "ই": 1349, - "উ": 1350, - "এ": 1351, - "ও": 1352, - "ক": 1353, - "খ": 1354, - "গ": 1355, - "চ": 1356, - "ছ": 1357, - "জ": 1358, - "ট": 1359, - "ড": 1360, - "ণ": 1361, - "ত": 1362, - "থ": 1363, - "দ": 1364, - "ধ": 1365, - "ন": 1366, - "প": 1367, - "ব": 1368, - "ভ": 1369, - "ম": 1370, - "য": 1371, - "র": 1372, - "ল": 1373, - "শ": 1374, - "ষ": 1375, - "স": 1376, - "হ": 1377, - "া": 1378, - "ি": 1379, - "ী": 1380, - "ে": 1381, - "க": 1382, - "ச": 1383, - "ட": 1384, - "த": 1385, - "ந": 1386, - "ன": 1387, - "ப": 1388, - "ம": 1389, - "ய": 1390, - "ர": 1391, - "ல": 1392, - "ள": 1393, - "வ": 1394, - "ா": 1395, - "ி": 1396, - "ு": 1397, - "ே": 1398, - "ை": 1399, - "ನ": 1400, - "ರ": 1401, - "ಾ": 1402, - "ක": 1403, - "ය": 1404, - "ර": 1405, - "ල": 1406, - "ව": 1407, - "ා": 1408, - "ก": 1409, - "ง": 1410, - "ต": 1411, - "ท": 1412, - "น": 1413, - "พ": 1414, - "ม": 1415, - "ย": 1416, - "ร": 1417, - "ล": 1418, - "ว": 1419, - "ส": 1420, - "อ": 1421, - "า": 1422, - "เ": 1423, - "་": 1424, - "།": 1425, - "ག": 1426, - "ང": 1427, - "ད": 1428, - "ན": 1429, - "པ": 1430, - "བ": 1431, - "མ": 1432, - "འ": 1433, - "ར": 1434, - "ལ": 1435, - "ས": 1436, - "မ": 1437, - "ა": 1438, - "ბ": 1439, - "გ": 1440, - "დ": 1441, - "ე": 1442, - "ვ": 1443, - "თ": 1444, - "ი": 1445, - "კ": 1446, - "ლ": 1447, - "მ": 1448, - "ნ": 1449, - "ო": 1450, - "რ": 1451, - "ს": 1452, - "ტ": 1453, - "უ": 1454, - "ᄀ": 1455, - "ᄂ": 1456, - "ᄃ": 1457, - "ᄅ": 1458, - "ᄆ": 1459, - "ᄇ": 1460, - "ᄉ": 1461, - "ᄊ": 1462, - "ᄋ": 1463, - "ᄌ": 1464, - "ᄎ": 1465, - "ᄏ": 1466, - "ᄐ": 1467, - "ᄑ": 1468, - "ᄒ": 1469, - "ᅡ": 1470, - "ᅢ": 1471, - "ᅥ": 1472, - "ᅦ": 1473, - "ᅧ": 1474, - "ᅩ": 1475, - "ᅪ": 1476, - "ᅭ": 1477, - "ᅮ": 1478, - "ᅯ": 1479, - "ᅲ": 1480, - "ᅳ": 1481, - "ᅴ": 1482, - "ᅵ": 1483, - "ᆨ": 1484, - "ᆫ": 1485, - "ᆯ": 1486, - "ᆷ": 1487, - "ᆸ": 1488, - "ᆼ": 1489, - "ᴬ": 1490, - "ᴮ": 1491, - "ᴰ": 1492, - "ᴵ": 1493, - "ᴺ": 1494, - "ᵀ": 1495, - "ᵃ": 1496, - "ᵇ": 1497, - "ᵈ": 1498, - "ᵉ": 1499, - "ᵍ": 1500, - "ᵏ": 1501, - "ᵐ": 1502, - "ᵒ": 1503, - "ᵖ": 1504, - "ᵗ": 1505, - "ᵘ": 1506, - "ᵢ": 1507, - "ᵣ": 1508, - "ᵤ": 1509, - "ᵥ": 1510, - "ᶜ": 1511, - "ᶠ": 1512, - "‐": 1513, - "‑": 1514, - "‒": 1515, - "–": 1516, - "—": 1517, - "―": 1518, - "‖": 1519, - "‘": 1520, - "’": 1521, - "‚": 1522, - "“": 1523, - "”": 1524, - "„": 1525, - "†": 1526, - "‡": 1527, - "•": 1528, - "…": 1529, - "‰": 1530, - "′": 1531, - "″": 1532, - "›": 1533, - "‿": 1534, - "⁄": 1535, - "⁰": 1536, - "ⁱ": 1537, - "⁴": 1538, - "⁵": 1539, - "⁶": 1540, - "⁷": 1541, - "⁸": 1542, - "⁹": 1543, - "⁺": 1544, - "⁻": 1545, - "ⁿ": 1546, - "₀": 1547, - "₁": 1548, - "₂": 1549, - "₃": 1550, - "₄": 1551, - "₅": 1552, - "₆": 1553, - "₇": 1554, - "₈": 1555, - "₉": 1556, - "₊": 1557, - "₍": 1558, - "₎": 1559, - "ₐ": 1560, - "ₑ": 1561, - "ₒ": 1562, - "ₓ": 1563, - "ₕ": 1564, - "ₖ": 1565, - "ₗ": 1566, - "ₘ": 1567, - "ₙ": 1568, - "ₚ": 1569, - "ₛ": 1570, - "ₜ": 1571, - "₤": 1572, - "₩": 1573, - "€": 1574, - "₱": 1575, - "₹": 1576, - "ℓ": 1577, - "№": 1578, - "ℝ": 1579, - "™": 1580, - "⅓": 1581, - "⅔": 1582, - "←": 1583, - "↑": 1584, - "→": 1585, - "↓": 1586, - "↔": 1587, - "↦": 1588, - "⇄": 1589, - "⇌": 1590, - "⇒": 1591, - "∂": 1592, - "∅": 1593, - "∆": 1594, - "∇": 1595, - "∈": 1596, - "−": 1597, - "∗": 1598, - "∘": 1599, - "√": 1600, - "∞": 1601, - "∧": 1602, - "∨": 1603, - "∩": 1604, - "∪": 1605, - "≈": 1606, - "≡": 1607, - "≤": 1608, - "≥": 1609, - "⊂": 1610, - "⊆": 1611, - "⊕": 1612, - "⊗": 1613, - "⋅": 1614, - "─": 1615, - "│": 1616, - "■": 1617, - "▪": 1618, - "●": 1619, - "★": 1620, - "☆": 1621, - "☉": 1622, - "♠": 1623, - "♣": 1624, - "♥": 1625, - "♦": 1626, - "♭": 1627, - "♯": 1628, - "⟨": 1629, - "⟩": 1630, - "ⱼ": 1631, - "⺩": 1632, - "⺼": 1633, - "⽥": 1634, - "、": 1635, - "。": 1636, - "〈": 1637, - "〉": 1638, - "《": 1639, - "》": 1640, - "「": 1641, - "」": 1642, - "『": 1643, - "』": 1644, - "〜": 1645, - "あ": 1646, - "い": 1647, - "う": 1648, - "え": 1649, - "お": 1650, - "か": 1651, - "き": 1652, - "く": 1653, - "け": 1654, - "こ": 1655, - "さ": 1656, - "し": 1657, - "す": 1658, - "せ": 1659, - "そ": 1660, - "た": 1661, - "ち": 1662, - "っ": 1663, - "つ": 1664, - "て": 1665, - "と": 1666, - "な": 1667, - "に": 1668, - "ぬ": 1669, - "ね": 1670, - "の": 1671, - "は": 1672, - "ひ": 1673, - "ふ": 1674, - "へ": 1675, - "ほ": 1676, - "ま": 1677, - "み": 1678, - "む": 1679, - "め": 1680, - "も": 1681, - "や": 1682, - "ゆ": 1683, - "よ": 1684, - "ら": 1685, - "り": 1686, - "る": 1687, - "れ": 1688, - "ろ": 1689, - "を": 1690, - "ん": 1691, - "ァ": 1692, - "ア": 1693, - "ィ": 1694, - "イ": 1695, - "ウ": 1696, - "ェ": 1697, - "エ": 1698, - "オ": 1699, - "カ": 1700, - "キ": 1701, - "ク": 1702, - "ケ": 1703, - "コ": 1704, - "サ": 1705, - "シ": 1706, - "ス": 1707, - "セ": 1708, - "タ": 1709, - "チ": 1710, - "ッ": 1711, - "ツ": 1712, - "テ": 1713, - "ト": 1714, - "ナ": 1715, - "ニ": 1716, - "ノ": 1717, - "ハ": 1718, - "ヒ": 1719, - "フ": 1720, - "ヘ": 1721, - "ホ": 1722, - "マ": 1723, - "ミ": 1724, - "ム": 1725, - "メ": 1726, - "モ": 1727, - "ャ": 1728, - "ュ": 1729, - "ョ": 1730, - "ラ": 1731, - "リ": 1732, - "ル": 1733, - "レ": 1734, - "ロ": 1735, - "ワ": 1736, - "ン": 1737, - "・": 1738, - "ー": 1739, - "一": 1740, - "三": 1741, - "上": 1742, - "下": 1743, - "不": 1744, - "世": 1745, - "中": 1746, - "主": 1747, - "久": 1748, - "之": 1749, - "也": 1750, - "事": 1751, - "二": 1752, - "五": 1753, - "井": 1754, - "京": 1755, - "人": 1756, - "亻": 1757, - "仁": 1758, - "介": 1759, - "代": 1760, - "仮": 1761, - "伊": 1762, - "会": 1763, - "佐": 1764, - "侍": 1765, - "保": 1766, - "信": 1767, - "健": 1768, - "元": 1769, - "光": 1770, - "八": 1771, - "公": 1772, - "内": 1773, - "出": 1774, - "分": 1775, - "前": 1776, - "劉": 1777, - "力": 1778, - "加": 1779, - "勝": 1780, - "北": 1781, - "区": 1782, - "十": 1783, - "千": 1784, - "南": 1785, - "博": 1786, - "原": 1787, - "口": 1788, - "古": 1789, - "史": 1790, - "司": 1791, - "合": 1792, - "吉": 1793, - "同": 1794, - "名": 1795, - "和": 1796, - "囗": 1797, - "四": 1798, - "国": 1799, - "國": 1800, - "土": 1801, - "地": 1802, - "坂": 1803, - "城": 1804, - "堂": 1805, - "場": 1806, - "士": 1807, - "夏": 1808, - "外": 1809, - "大": 1810, - "天": 1811, - "太": 1812, - "夫": 1813, - "奈": 1814, - "女": 1815, - "子": 1816, - "学": 1817, - "宀": 1818, - "宇": 1819, - "安": 1820, - "宗": 1821, - "定": 1822, - "宣": 1823, - "宮": 1824, - "家": 1825, - "宿": 1826, - "寺": 1827, - "將": 1828, - "小": 1829, - "尚": 1830, - "山": 1831, - "岡": 1832, - "島": 1833, - "崎": 1834, - "川": 1835, - "州": 1836, - "巿": 1837, - "帝": 1838, - "平": 1839, - "年": 1840, - "幸": 1841, - "广": 1842, - "弘": 1843, - "張": 1844, - "彳": 1845, - "後": 1846, - "御": 1847, - "德": 1848, - "心": 1849, - "忄": 1850, - "志": 1851, - "忠": 1852, - "愛": 1853, - "成": 1854, - "我": 1855, - "戦": 1856, - "戸": 1857, - "手": 1858, - "扌": 1859, - "政": 1860, - "文": 1861, - "新": 1862, - "方": 1863, - "日": 1864, - "明": 1865, - "星": 1866, - "春": 1867, - "昭": 1868, - "智": 1869, - "曲": 1870, - "書": 1871, - "月": 1872, - "有": 1873, - "朝": 1874, - "木": 1875, - "本": 1876, - "李": 1877, - "村": 1878, - "東": 1879, - "松": 1880, - "林": 1881, - "森": 1882, - "楊": 1883, - "樹": 1884, - "橋": 1885, - "歌": 1886, - "止": 1887, - "正": 1888, - "武": 1889, - "比": 1890, - "氏": 1891, - "民": 1892, - "水": 1893, - "氵": 1894, - "氷": 1895, - "永": 1896, - "江": 1897, - "沢": 1898, - "河": 1899, - "治": 1900, - "法": 1901, - "海": 1902, - "清": 1903, - "漢": 1904, - "瀬": 1905, - "火": 1906, - "版": 1907, - "犬": 1908, - "王": 1909, - "生": 1910, - "田": 1911, - "男": 1912, - "疒": 1913, - "発": 1914, - "白": 1915, - "的": 1916, - "皇": 1917, - "目": 1918, - "相": 1919, - "省": 1920, - "真": 1921, - "石": 1922, - "示": 1923, - "社": 1924, - "神": 1925, - "福": 1926, - "禾": 1927, - "秀": 1928, - "秋": 1929, - "空": 1930, - "立": 1931, - "章": 1932, - "竹": 1933, - "糹": 1934, - "美": 1935, - "義": 1936, - "耳": 1937, - "良": 1938, - "艹": 1939, - "花": 1940, - "英": 1941, - "華": 1942, - "葉": 1943, - "藤": 1944, - "行": 1945, - "街": 1946, - "西": 1947, - "見": 1948, - "訁": 1949, - "語": 1950, - "谷": 1951, - "貝": 1952, - "貴": 1953, - "車": 1954, - "軍": 1955, - "辶": 1956, - "道": 1957, - "郎": 1958, - "郡": 1959, - "部": 1960, - "都": 1961, - "里": 1962, - "野": 1963, - "金": 1964, - "鈴": 1965, - "镇": 1966, - "長": 1967, - "門": 1968, - "間": 1969, - "阝": 1970, - "阿": 1971, - "陳": 1972, - "陽": 1973, - "雄": 1974, - "青": 1975, - "面": 1976, - "風": 1977, - "食": 1978, - "香": 1979, - "馬": 1980, - "高": 1981, - "龍": 1982, - "龸": 1983, - "fi": 1984, - "fl": 1985, - "!": 1986, - "(": 1987, - ")": 1988, - ",": 1989, - "-": 1990, - ".": 1991, - "/": 1992, - ":": 1993, - "?": 1994, - "~": 1995, - "the": 1996, - "of": 1997, - "and": 1998, - "in": 1999, - "to": 2000, - "was": 2001, - "he": 2002, - "is": 2003, - "as": 2004, - "for": 2005, - "on": 2006, - "with": 2007, - "that": 2008, - "it": 2009, - "his": 2010, - "by": 2011, - "at": 2012, - "from": 2013, - "her": 2014, - "##s": 2015, - "she": 2016, - "you": 2017, - "had": 2018, - "an": 2019, - "were": 2020, - "but": 2021, - "be": 2022, - "this": 2023, - "are": 2024, - "not": 2025, - "my": 2026, - "they": 2027, - "one": 2028, - "which": 2029, - "or": 2030, - "have": 2031, - "him": 2032, - "me": 2033, - "first": 2034, - "all": 2035, - "also": 2036, - "their": 2037, - "has": 2038, - "up": 2039, - "who": 2040, - "out": 2041, - "been": 2042, - "when": 2043, - "after": 2044, - "there": 2045, - "into": 2046, - "new": 2047, - "two": 2048, - "its": 2049, - "##a": 2050, - "time": 2051, - "would": 2052, - "no": 2053, - "what": 2054, - "about": 2055, - "said": 2056, - "we": 2057, - "over": 2058, - "then": 2059, - "other": 2060, - "so": 2061, - "more": 2062, - "##e": 2063, - "can": 2064, - "if": 2065, - "like": 2066, - "back": 2067, - "them": 2068, - "only": 2069, - "some": 2070, - "could": 2071, - "##i": 2072, - "where": 2073, - "just": 2074, - "##ing": 2075, - "during": 2076, - "before": 2077, - "##n": 2078, - "do": 2079, - "##o": 2080, - "made": 2081, - "school": 2082, - "through": 2083, - "than": 2084, - "now": 2085, - "years": 2086, - "most": 2087, - "world": 2088, - "may": 2089, - "between": 2090, - "down": 2091, - "well": 2092, - "three": 2093, - "##d": 2094, - "year": 2095, - "while": 2096, - "will": 2097, - "##ed": 2098, - "##r": 2099, - "##y": 2100, - "later": 2101, - "##t": 2102, - "city": 2103, - "under": 2104, - "around": 2105, - "did": 2106, - "such": 2107, - "being": 2108, - "used": 2109, - "state": 2110, - "people": 2111, - "part": 2112, - "know": 2113, - "against": 2114, - "your": 2115, - "many": 2116, - "second": 2117, - "university": 2118, - "both": 2119, - "national": 2120, - "##er": 2121, - "these": 2122, - "don": 2123, - "known": 2124, - "off": 2125, - "way": 2126, - "until": 2127, - "re": 2128, - "how": 2129, - "even": 2130, - "get": 2131, - "head": 2132, - "...": 2133, - "didn": 2134, - "##ly": 2135, - "team": 2136, - "american": 2137, - "because": 2138, - "de": 2139, - "##l": 2140, - "born": 2141, - "united": 2142, - "film": 2143, - "since": 2144, - "still": 2145, - "long": 2146, - "work": 2147, - "south": 2148, - "us": 2149, - "became": 2150, - "any": 2151, - "high": 2152, - "again": 2153, - "day": 2154, - "family": 2155, - "see": 2156, - "right": 2157, - "man": 2158, - "eyes": 2159, - "house": 2160, - "season": 2161, - "war": 2162, - "states": 2163, - "including": 2164, - "took": 2165, - "life": 2166, - "north": 2167, - "same": 2168, - "each": 2169, - "called": 2170, - "name": 2171, - "much": 2172, - "place": 2173, - "however": 2174, - "go": 2175, - "four": 2176, - "group": 2177, - "another": 2178, - "found": 2179, - "won": 2180, - "area": 2181, - "here": 2182, - "going": 2183, - "10": 2184, - "away": 2185, - "series": 2186, - "left": 2187, - "home": 2188, - "music": 2189, - "best": 2190, - "make": 2191, - "hand": 2192, - "number": 2193, - "company": 2194, - "several": 2195, - "never": 2196, - "last": 2197, - "john": 2198, - "000": 2199, - "very": 2200, - "album": 2201, - "take": 2202, - "end": 2203, - "good": 2204, - "too": 2205, - "following": 2206, - "released": 2207, - "game": 2208, - "played": 2209, - "little": 2210, - "began": 2211, - "district": 2212, - "##m": 2213, - "old": 2214, - "want": 2215, - "those": 2216, - "side": 2217, - "held": 2218, - "own": 2219, - "early": 2220, - "county": 2221, - "ll": 2222, - "league": 2223, - "use": 2224, - "west": 2225, - "##u": 2226, - "face": 2227, - "think": 2228, - "##es": 2229, - "2010": 2230, - "government": 2231, - "##h": 2232, - "march": 2233, - "came": 2234, - "small": 2235, - "general": 2236, - "town": 2237, - "june": 2238, - "##on": 2239, - "line": 2240, - "based": 2241, - "something": 2242, - "##k": 2243, - "september": 2244, - "thought": 2245, - "looked": 2246, - "along": 2247, - "international": 2248, - "2011": 2249, - "air": 2250, - "july": 2251, - "club": 2252, - "went": 2253, - "january": 2254, - "october": 2255, - "our": 2256, - "august": 2257, - "april": 2258, - "york": 2259, - "12": 2260, - "few": 2261, - "2012": 2262, - "2008": 2263, - "east": 2264, - "show": 2265, - "member": 2266, - "college": 2267, - "2009": 2268, - "father": 2269, - "public": 2270, - "##us": 2271, - "come": 2272, - "men": 2273, - "five": 2274, - "set": 2275, - "station": 2276, - "church": 2277, - "##c": 2278, - "next": 2279, - "former": 2280, - "november": 2281, - "room": 2282, - "party": 2283, - "located": 2284, - "december": 2285, - "2013": 2286, - "age": 2287, - "got": 2288, - "2007": 2289, - "##g": 2290, - "system": 2291, - "let": 2292, - "love": 2293, - "2006": 2294, - "though": 2295, - "every": 2296, - "2014": 2297, - "look": 2298, - "song": 2299, - "water": 2300, - "century": 2301, - "without": 2302, - "body": 2303, - "black": 2304, - "night": 2305, - "within": 2306, - "great": 2307, - "women": 2308, - "single": 2309, - "ve": 2310, - "building": 2311, - "large": 2312, - "population": 2313, - "river": 2314, - "named": 2315, - "band": 2316, - "white": 2317, - "started": 2318, - "##an": 2319, - "once": 2320, - "15": 2321, - "20": 2322, - "should": 2323, - "18": 2324, - "2015": 2325, - "service": 2326, - "top": 2327, - "built": 2328, - "british": 2329, - "open": 2330, - "death": 2331, - "king": 2332, - "moved": 2333, - "local": 2334, - "times": 2335, - "children": 2336, - "february": 2337, - "book": 2338, - "why": 2339, - "11": 2340, - "door": 2341, - "need": 2342, - "president": 2343, - "order": 2344, - "final": 2345, - "road": 2346, - "wasn": 2347, - "although": 2348, - "due": 2349, - "major": 2350, - "died": 2351, - "village": 2352, - "third": 2353, - "knew": 2354, - "2016": 2355, - "asked": 2356, - "turned": 2357, - "st": 2358, - "wanted": 2359, - "say": 2360, - "##p": 2361, - "together": 2362, - "received": 2363, - "main": 2364, - "son": 2365, - "served": 2366, - "different": 2367, - "##en": 2368, - "behind": 2369, - "himself": 2370, - "felt": 2371, - "members": 2372, - "power": 2373, - "football": 2374, - "law": 2375, - "voice": 2376, - "play": 2377, - "##in": 2378, - "near": 2379, - "park": 2380, - "history": 2381, - "30": 2382, - "having": 2383, - "2005": 2384, - "16": 2385, - "##man": 2386, - "saw": 2387, - "mother": 2388, - "##al": 2389, - "army": 2390, - "point": 2391, - "front": 2392, - "help": 2393, - "english": 2394, - "street": 2395, - "art": 2396, - "late": 2397, - "hands": 2398, - "games": 2399, - "award": 2400, - "##ia": 2401, - "young": 2402, - "14": 2403, - "put": 2404, - "published": 2405, - "country": 2406, - "division": 2407, - "across": 2408, - "told": 2409, - "13": 2410, - "often": 2411, - "ever": 2412, - "french": 2413, - "london": 2414, - "center": 2415, - "six": 2416, - "red": 2417, - "2017": 2418, - "led": 2419, - "days": 2420, - "include": 2421, - "light": 2422, - "25": 2423, - "find": 2424, - "tell": 2425, - "among": 2426, - "species": 2427, - "really": 2428, - "according": 2429, - "central": 2430, - "half": 2431, - "2004": 2432, - "form": 2433, - "original": 2434, - "gave": 2435, - "office": 2436, - "making": 2437, - "enough": 2438, - "lost": 2439, - "full": 2440, - "opened": 2441, - "must": 2442, - "included": 2443, - "live": 2444, - "given": 2445, - "german": 2446, - "player": 2447, - "run": 2448, - "business": 2449, - "woman": 2450, - "community": 2451, - "cup": 2452, - "might": 2453, - "million": 2454, - "land": 2455, - "2000": 2456, - "court": 2457, - "development": 2458, - "17": 2459, - "short": 2460, - "round": 2461, - "ii": 2462, - "km": 2463, - "seen": 2464, - "class": 2465, - "story": 2466, - "always": 2467, - "become": 2468, - "sure": 2469, - "research": 2470, - "almost": 2471, - "director": 2472, - "council": 2473, - "la": 2474, - "##2": 2475, - "career": 2476, - "things": 2477, - "using": 2478, - "island": 2479, - "##z": 2480, - "couldn": 2481, - "car": 2482, - "##is": 2483, - "24": 2484, - "close": 2485, - "force": 2486, - "##1": 2487, - "better": 2488, - "free": 2489, - "support": 2490, - "control": 2491, - "field": 2492, - "students": 2493, - "2003": 2494, - "education": 2495, - "married": 2496, - "##b": 2497, - "nothing": 2498, - "worked": 2499, - "others": 2500, - "record": 2501, - "big": 2502, - "inside": 2503, - "level": 2504, - "anything": 2505, - "continued": 2506, - "give": 2507, - "james": 2508, - "##3": 2509, - "military": 2510, - "established": 2511, - "non": 2512, - "returned": 2513, - "feel": 2514, - "does": 2515, - "title": 2516, - "written": 2517, - "thing": 2518, - "feet": 2519, - "william": 2520, - "far": 2521, - "co": 2522, - "association": 2523, - "hard": 2524, - "already": 2525, - "2002": 2526, - "##ra": 2527, - "championship": 2528, - "human": 2529, - "western": 2530, - "100": 2531, - "##na": 2532, - "department": 2533, - "hall": 2534, - "role": 2535, - "various": 2536, - "production": 2537, - "21": 2538, - "19": 2539, - "heart": 2540, - "2001": 2541, - "living": 2542, - "fire": 2543, - "version": 2544, - "##ers": 2545, - "##f": 2546, - "television": 2547, - "royal": 2548, - "##4": 2549, - "produced": 2550, - "working": 2551, - "act": 2552, - "case": 2553, - "society": 2554, - "region": 2555, - "present": 2556, - "radio": 2557, - "period": 2558, - "looking": 2559, - "least": 2560, - "total": 2561, - "keep": 2562, - "england": 2563, - "wife": 2564, - "program": 2565, - "per": 2566, - "brother": 2567, - "mind": 2568, - "special": 2569, - "22": 2570, - "##le": 2571, - "am": 2572, - "works": 2573, - "soon": 2574, - "##6": 2575, - "political": 2576, - "george": 2577, - "services": 2578, - "taken": 2579, - "created": 2580, - "##7": 2581, - "further": 2582, - "able": 2583, - "reached": 2584, - "david": 2585, - "union": 2586, - "joined": 2587, - "upon": 2588, - "done": 2589, - "important": 2590, - "social": 2591, - "information": 2592, - "either": 2593, - "##ic": 2594, - "##x": 2595, - "appeared": 2596, - "position": 2597, - "ground": 2598, - "lead": 2599, - "rock": 2600, - "dark": 2601, - "election": 2602, - "23": 2603, - "board": 2604, - "france": 2605, - "hair": 2606, - "course": 2607, - "arms": 2608, - "site": 2609, - "police": 2610, - "girl": 2611, - "instead": 2612, - "real": 2613, - "sound": 2614, - "##v": 2615, - "words": 2616, - "moment": 2617, - "##te": 2618, - "someone": 2619, - "##8": 2620, - "summer": 2621, - "project": 2622, - "announced": 2623, - "san": 2624, - "less": 2625, - "wrote": 2626, - "past": 2627, - "followed": 2628, - "##5": 2629, - "blue": 2630, - "founded": 2631, - "al": 2632, - "finally": 2633, - "india": 2634, - "taking": 2635, - "records": 2636, - "america": 2637, - "##ne": 2638, - "1999": 2639, - "design": 2640, - "considered": 2641, - "northern": 2642, - "god": 2643, - "stop": 2644, - "battle": 2645, - "toward": 2646, - "european": 2647, - "outside": 2648, - "described": 2649, - "track": 2650, - "today": 2651, - "playing": 2652, - "language": 2653, - "28": 2654, - "call": 2655, - "26": 2656, - "heard": 2657, - "professional": 2658, - "low": 2659, - "australia": 2660, - "miles": 2661, - "california": 2662, - "win": 2663, - "yet": 2664, - "green": 2665, - "##ie": 2666, - "trying": 2667, - "blood": 2668, - "##ton": 2669, - "southern": 2670, - "science": 2671, - "maybe": 2672, - "everything": 2673, - "match": 2674, - "square": 2675, - "27": 2676, - "mouth": 2677, - "video": 2678, - "race": 2679, - "recorded": 2680, - "leave": 2681, - "above": 2682, - "##9": 2683, - "daughter": 2684, - "points": 2685, - "space": 2686, - "1998": 2687, - "museum": 2688, - "change": 2689, - "middle": 2690, - "common": 2691, - "##0": 2692, - "move": 2693, - "tv": 2694, - "post": 2695, - "##ta": 2696, - "lake": 2697, - "seven": 2698, - "tried": 2699, - "elected": 2700, - "closed": 2701, - "ten": 2702, - "paul": 2703, - "minister": 2704, - "##th": 2705, - "months": 2706, - "start": 2707, - "chief": 2708, - "return": 2709, - "canada": 2710, - "person": 2711, - "sea": 2712, - "release": 2713, - "similar": 2714, - "modern": 2715, - "brought": 2716, - "rest": 2717, - "hit": 2718, - "formed": 2719, - "mr": 2720, - "##la": 2721, - "1997": 2722, - "floor": 2723, - "event": 2724, - "doing": 2725, - "thomas": 2726, - "1996": 2727, - "robert": 2728, - "care": 2729, - "killed": 2730, - "training": 2731, - "star": 2732, - "week": 2733, - "needed": 2734, - "turn": 2735, - "finished": 2736, - "railway": 2737, - "rather": 2738, - "news": 2739, - "health": 2740, - "sent": 2741, - "example": 2742, - "ran": 2743, - "term": 2744, - "michael": 2745, - "coming": 2746, - "currently": 2747, - "yes": 2748, - "forces": 2749, - "despite": 2750, - "gold": 2751, - "areas": 2752, - "50": 2753, - "stage": 2754, - "fact": 2755, - "29": 2756, - "dead": 2757, - "says": 2758, - "popular": 2759, - "2018": 2760, - "originally": 2761, - "germany": 2762, - "probably": 2763, - "developed": 2764, - "result": 2765, - "pulled": 2766, - "friend": 2767, - "stood": 2768, - "money": 2769, - "running": 2770, - "mi": 2771, - "signed": 2772, - "word": 2773, - "songs": 2774, - "child": 2775, - "eventually": 2776, - "met": 2777, - "tour": 2778, - "average": 2779, - "teams": 2780, - "minutes": 2781, - "festival": 2782, - "current": 2783, - "deep": 2784, - "kind": 2785, - "1995": 2786, - "decided": 2787, - "usually": 2788, - "eastern": 2789, - "seemed": 2790, - "##ness": 2791, - "episode": 2792, - "bed": 2793, - "added": 2794, - "table": 2795, - "indian": 2796, - "private": 2797, - "charles": 2798, - "route": 2799, - "available": 2800, - "idea": 2801, - "throughout": 2802, - "centre": 2803, - "addition": 2804, - "appointed": 2805, - "style": 2806, - "1994": 2807, - "books": 2808, - "eight": 2809, - "construction": 2810, - "press": 2811, - "mean": 2812, - "wall": 2813, - "friends": 2814, - "remained": 2815, - "schools": 2816, - "study": 2817, - "##ch": 2818, - "##um": 2819, - "institute": 2820, - "oh": 2821, - "chinese": 2822, - "sometimes": 2823, - "events": 2824, - "possible": 2825, - "1992": 2826, - "australian": 2827, - "type": 2828, - "brown": 2829, - "forward": 2830, - "talk": 2831, - "process": 2832, - "food": 2833, - "debut": 2834, - "seat": 2835, - "performance": 2836, - "committee": 2837, - "features": 2838, - "character": 2839, - "arts": 2840, - "herself": 2841, - "else": 2842, - "lot": 2843, - "strong": 2844, - "russian": 2845, - "range": 2846, - "hours": 2847, - "peter": 2848, - "arm": 2849, - "##da": 2850, - "morning": 2851, - "dr": 2852, - "sold": 2853, - "##ry": 2854, - "quickly": 2855, - "directed": 2856, - "1993": 2857, - "guitar": 2858, - "china": 2859, - "##w": 2860, - "31": 2861, - "list": 2862, - "##ma": 2863, - "performed": 2864, - "media": 2865, - "uk": 2866, - "players": 2867, - "smile": 2868, - "##rs": 2869, - "myself": 2870, - "40": 2871, - "placed": 2872, - "coach": 2873, - "province": 2874, - "towards": 2875, - "wouldn": 2876, - "leading": 2877, - "whole": 2878, - "boy": 2879, - "official": 2880, - "designed": 2881, - "grand": 2882, - "census": 2883, - "##el": 2884, - "europe": 2885, - "attack": 2886, - "japanese": 2887, - "henry": 2888, - "1991": 2889, - "##re": 2890, - "##os": 2891, - "cross": 2892, - "getting": 2893, - "alone": 2894, - "action": 2895, - "lower": 2896, - "network": 2897, - "wide": 2898, - "washington": 2899, - "japan": 2900, - "1990": 2901, - "hospital": 2902, - "believe": 2903, - "changed": 2904, - "sister": 2905, - "##ar": 2906, - "hold": 2907, - "gone": 2908, - "sir": 2909, - "hadn": 2910, - "ship": 2911, - "##ka": 2912, - "studies": 2913, - "academy": 2914, - "shot": 2915, - "rights": 2916, - "below": 2917, - "base": 2918, - "bad": 2919, - "involved": 2920, - "kept": 2921, - "largest": 2922, - "##ist": 2923, - "bank": 2924, - "future": 2925, - "especially": 2926, - "beginning": 2927, - "mark": 2928, - "movement": 2929, - "section": 2930, - "female": 2931, - "magazine": 2932, - "plan": 2933, - "professor": 2934, - "lord": 2935, - "longer": 2936, - "##ian": 2937, - "sat": 2938, - "walked": 2939, - "hill": 2940, - "actually": 2941, - "civil": 2942, - "energy": 2943, - "model": 2944, - "families": 2945, - "size": 2946, - "thus": 2947, - "aircraft": 2948, - "completed": 2949, - "includes": 2950, - "data": 2951, - "captain": 2952, - "##or": 2953, - "fight": 2954, - "vocals": 2955, - "featured": 2956, - "richard": 2957, - "bridge": 2958, - "fourth": 2959, - "1989": 2960, - "officer": 2961, - "stone": 2962, - "hear": 2963, - "##ism": 2964, - "means": 2965, - "medical": 2966, - "groups": 2967, - "management": 2968, - "self": 2969, - "lips": 2970, - "competition": 2971, - "entire": 2972, - "lived": 2973, - "technology": 2974, - "leaving": 2975, - "federal": 2976, - "tournament": 2977, - "bit": 2978, - "passed": 2979, - "hot": 2980, - "independent": 2981, - "awards": 2982, - "kingdom": 2983, - "mary": 2984, - "spent": 2985, - "fine": 2986, - "doesn": 2987, - "reported": 2988, - "##ling": 2989, - "jack": 2990, - "fall": 2991, - "raised": 2992, - "itself": 2993, - "stay": 2994, - "true": 2995, - "studio": 2996, - "1988": 2997, - "sports": 2998, - "replaced": 2999, - "paris": 3000, - "systems": 3001, - "saint": 3002, - "leader": 3003, - "theatre": 3004, - "whose": 3005, - "market": 3006, - "capital": 3007, - "parents": 3008, - "spanish": 3009, - "canadian": 3010, - "earth": 3011, - "##ity": 3012, - "cut": 3013, - "degree": 3014, - "writing": 3015, - "bay": 3016, - "christian": 3017, - "awarded": 3018, - "natural": 3019, - "higher": 3020, - "bill": 3021, - "##as": 3022, - "coast": 3023, - "provided": 3024, - "previous": 3025, - "senior": 3026, - "ft": 3027, - "valley": 3028, - "organization": 3029, - "stopped": 3030, - "onto": 3031, - "countries": 3032, - "parts": 3033, - "conference": 3034, - "queen": 3035, - "security": 3036, - "interest": 3037, - "saying": 3038, - "allowed": 3039, - "master": 3040, - "earlier": 3041, - "phone": 3042, - "matter": 3043, - "smith": 3044, - "winning": 3045, - "try": 3046, - "happened": 3047, - "moving": 3048, - "campaign": 3049, - "los": 3050, - "##ley": 3051, - "breath": 3052, - "nearly": 3053, - "mid": 3054, - "1987": 3055, - "certain": 3056, - "girls": 3057, - "date": 3058, - "italian": 3059, - "african": 3060, - "standing": 3061, - "fell": 3062, - "artist": 3063, - "##ted": 3064, - "shows": 3065, - "deal": 3066, - "mine": 3067, - "industry": 3068, - "1986": 3069, - "##ng": 3070, - "everyone": 3071, - "republic": 3072, - "provide": 3073, - "collection": 3074, - "library": 3075, - "student": 3076, - "##ville": 3077, - "primary": 3078, - "owned": 3079, - "older": 3080, - "via": 3081, - "heavy": 3082, - "1st": 3083, - "makes": 3084, - "##able": 3085, - "attention": 3086, - "anyone": 3087, - "africa": 3088, - "##ri": 3089, - "stated": 3090, - "length": 3091, - "ended": 3092, - "fingers": 3093, - "command": 3094, - "staff": 3095, - "skin": 3096, - "foreign": 3097, - "opening": 3098, - "governor": 3099, - "okay": 3100, - "medal": 3101, - "kill": 3102, - "sun": 3103, - "cover": 3104, - "job": 3105, - "1985": 3106, - "introduced": 3107, - "chest": 3108, - "hell": 3109, - "feeling": 3110, - "##ies": 3111, - "success": 3112, - "meet": 3113, - "reason": 3114, - "standard": 3115, - "meeting": 3116, - "novel": 3117, - "1984": 3118, - "trade": 3119, - "source": 3120, - "buildings": 3121, - "##land": 3122, - "rose": 3123, - "guy": 3124, - "goal": 3125, - "##ur": 3126, - "chapter": 3127, - "native": 3128, - "husband": 3129, - "previously": 3130, - "unit": 3131, - "limited": 3132, - "entered": 3133, - "weeks": 3134, - "producer": 3135, - "operations": 3136, - "mountain": 3137, - "takes": 3138, - "covered": 3139, - "forced": 3140, - "related": 3141, - "roman": 3142, - "complete": 3143, - "successful": 3144, - "key": 3145, - "texas": 3146, - "cold": 3147, - "##ya": 3148, - "channel": 3149, - "1980": 3150, - "traditional": 3151, - "films": 3152, - "dance": 3153, - "clear": 3154, - "approximately": 3155, - "500": 3156, - "nine": 3157, - "van": 3158, - "prince": 3159, - "question": 3160, - "active": 3161, - "tracks": 3162, - "ireland": 3163, - "regional": 3164, - "silver": 3165, - "author": 3166, - "personal": 3167, - "sense": 3168, - "operation": 3169, - "##ine": 3170, - "economic": 3171, - "1983": 3172, - "holding": 3173, - "twenty": 3174, - "isbn": 3175, - "additional": 3176, - "speed": 3177, - "hour": 3178, - "edition": 3179, - "regular": 3180, - "historic": 3181, - "places": 3182, - "whom": 3183, - "shook": 3184, - "movie": 3185, - "km²": 3186, - "secretary": 3187, - "prior": 3188, - "report": 3189, - "chicago": 3190, - "read": 3191, - "foundation": 3192, - "view": 3193, - "engine": 3194, - "scored": 3195, - "1982": 3196, - "units": 3197, - "ask": 3198, - "airport": 3199, - "property": 3200, - "ready": 3201, - "immediately": 3202, - "lady": 3203, - "month": 3204, - "listed": 3205, - "contract": 3206, - "##de": 3207, - "manager": 3208, - "themselves": 3209, - "lines": 3210, - "##ki": 3211, - "navy": 3212, - "writer": 3213, - "meant": 3214, - "##ts": 3215, - "runs": 3216, - "##ro": 3217, - "practice": 3218, - "championships": 3219, - "singer": 3220, - "glass": 3221, - "commission": 3222, - "required": 3223, - "forest": 3224, - "starting": 3225, - "culture": 3226, - "generally": 3227, - "giving": 3228, - "access": 3229, - "attended": 3230, - "test": 3231, - "couple": 3232, - "stand": 3233, - "catholic": 3234, - "martin": 3235, - "caught": 3236, - "executive": 3237, - "##less": 3238, - "eye": 3239, - "##ey": 3240, - "thinking": 3241, - "chair": 3242, - "quite": 3243, - "shoulder": 3244, - "1979": 3245, - "hope": 3246, - "decision": 3247, - "plays": 3248, - "defeated": 3249, - "municipality": 3250, - "whether": 3251, - "structure": 3252, - "offered": 3253, - "slowly": 3254, - "pain": 3255, - "ice": 3256, - "direction": 3257, - "##ion": 3258, - "paper": 3259, - "mission": 3260, - "1981": 3261, - "mostly": 3262, - "200": 3263, - "noted": 3264, - "individual": 3265, - "managed": 3266, - "nature": 3267, - "lives": 3268, - "plant": 3269, - "##ha": 3270, - "helped": 3271, - "except": 3272, - "studied": 3273, - "computer": 3274, - "figure": 3275, - "relationship": 3276, - "issue": 3277, - "significant": 3278, - "loss": 3279, - "die": 3280, - "smiled": 3281, - "gun": 3282, - "ago": 3283, - "highest": 3284, - "1972": 3285, - "##am": 3286, - "male": 3287, - "bring": 3288, - "goals": 3289, - "mexico": 3290, - "problem": 3291, - "distance": 3292, - "commercial": 3293, - "completely": 3294, - "location": 3295, - "annual": 3296, - "famous": 3297, - "drive": 3298, - "1976": 3299, - "neck": 3300, - "1978": 3301, - "surface": 3302, - "caused": 3303, - "italy": 3304, - "understand": 3305, - "greek": 3306, - "highway": 3307, - "wrong": 3308, - "hotel": 3309, - "comes": 3310, - "appearance": 3311, - "joseph": 3312, - "double": 3313, - "issues": 3314, - "musical": 3315, - "companies": 3316, - "castle": 3317, - "income": 3318, - "review": 3319, - "assembly": 3320, - "bass": 3321, - "initially": 3322, - "parliament": 3323, - "artists": 3324, - "experience": 3325, - "1974": 3326, - "particular": 3327, - "walk": 3328, - "foot": 3329, - "engineering": 3330, - "talking": 3331, - "window": 3332, - "dropped": 3333, - "##ter": 3334, - "miss": 3335, - "baby": 3336, - "boys": 3337, - "break": 3338, - "1975": 3339, - "stars": 3340, - "edge": 3341, - "remember": 3342, - "policy": 3343, - "carried": 3344, - "train": 3345, - "stadium": 3346, - "bar": 3347, - "sex": 3348, - "angeles": 3349, - "evidence": 3350, - "##ge": 3351, - "becoming": 3352, - "assistant": 3353, - "soviet": 3354, - "1977": 3355, - "upper": 3356, - "step": 3357, - "wing": 3358, - "1970": 3359, - "youth": 3360, - "financial": 3361, - "reach": 3362, - "##ll": 3363, - "actor": 3364, - "numerous": 3365, - "##se": 3366, - "##st": 3367, - "nodded": 3368, - "arrived": 3369, - "##ation": 3370, - "minute": 3371, - "##nt": 3372, - "believed": 3373, - "sorry": 3374, - "complex": 3375, - "beautiful": 3376, - "victory": 3377, - "associated": 3378, - "temple": 3379, - "1968": 3380, - "1973": 3381, - "chance": 3382, - "perhaps": 3383, - "metal": 3384, - "##son": 3385, - "1945": 3386, - "bishop": 3387, - "##et": 3388, - "lee": 3389, - "launched": 3390, - "particularly": 3391, - "tree": 3392, - "le": 3393, - "retired": 3394, - "subject": 3395, - "prize": 3396, - "contains": 3397, - "yeah": 3398, - "theory": 3399, - "empire": 3400, - "##ce": 3401, - "suddenly": 3402, - "waiting": 3403, - "trust": 3404, - "recording": 3405, - "##to": 3406, - "happy": 3407, - "terms": 3408, - "camp": 3409, - "champion": 3410, - "1971": 3411, - "religious": 3412, - "pass": 3413, - "zealand": 3414, - "names": 3415, - "2nd": 3416, - "port": 3417, - "ancient": 3418, - "tom": 3419, - "corner": 3420, - "represented": 3421, - "watch": 3422, - "legal": 3423, - "anti": 3424, - "justice": 3425, - "cause": 3426, - "watched": 3427, - "brothers": 3428, - "45": 3429, - "material": 3430, - "changes": 3431, - "simply": 3432, - "response": 3433, - "louis": 3434, - "fast": 3435, - "##ting": 3436, - "answer": 3437, - "60": 3438, - "historical": 3439, - "1969": 3440, - "stories": 3441, - "straight": 3442, - "create": 3443, - "feature": 3444, - "increased": 3445, - "rate": 3446, - "administration": 3447, - "virginia": 3448, - "el": 3449, - "activities": 3450, - "cultural": 3451, - "overall": 3452, - "winner": 3453, - "programs": 3454, - "basketball": 3455, - "legs": 3456, - "guard": 3457, - "beyond": 3458, - "cast": 3459, - "doctor": 3460, - "mm": 3461, - "flight": 3462, - "results": 3463, - "remains": 3464, - "cost": 3465, - "effect": 3466, - "winter": 3467, - "##ble": 3468, - "larger": 3469, - "islands": 3470, - "problems": 3471, - "chairman": 3472, - "grew": 3473, - "commander": 3474, - "isn": 3475, - "1967": 3476, - "pay": 3477, - "failed": 3478, - "selected": 3479, - "hurt": 3480, - "fort": 3481, - "box": 3482, - "regiment": 3483, - "majority": 3484, - "journal": 3485, - "35": 3486, - "edward": 3487, - "plans": 3488, - "##ke": 3489, - "##ni": 3490, - "shown": 3491, - "pretty": 3492, - "irish": 3493, - "characters": 3494, - "directly": 3495, - "scene": 3496, - "likely": 3497, - "operated": 3498, - "allow": 3499, - "spring": 3500, - "##j": 3501, - "junior": 3502, - "matches": 3503, - "looks": 3504, - "mike": 3505, - "houses": 3506, - "fellow": 3507, - "##tion": 3508, - "beach": 3509, - "marriage": 3510, - "##ham": 3511, - "##ive": 3512, - "rules": 3513, - "oil": 3514, - "65": 3515, - "florida": 3516, - "expected": 3517, - "nearby": 3518, - "congress": 3519, - "sam": 3520, - "peace": 3521, - "recent": 3522, - "iii": 3523, - "wait": 3524, - "subsequently": 3525, - "cell": 3526, - "##do": 3527, - "variety": 3528, - "serving": 3529, - "agreed": 3530, - "please": 3531, - "poor": 3532, - "joe": 3533, - "pacific": 3534, - "attempt": 3535, - "wood": 3536, - "democratic": 3537, - "piece": 3538, - "prime": 3539, - "##ca": 3540, - "rural": 3541, - "mile": 3542, - "touch": 3543, - "appears": 3544, - "township": 3545, - "1964": 3546, - "1966": 3547, - "soldiers": 3548, - "##men": 3549, - "##ized": 3550, - "1965": 3551, - "pennsylvania": 3552, - "closer": 3553, - "fighting": 3554, - "claimed": 3555, - "score": 3556, - "jones": 3557, - "physical": 3558, - "editor": 3559, - "##ous": 3560, - "filled": 3561, - "genus": 3562, - "specific": 3563, - "sitting": 3564, - "super": 3565, - "mom": 3566, - "##va": 3567, - "therefore": 3568, - "supported": 3569, - "status": 3570, - "fear": 3571, - "cases": 3572, - "store": 3573, - "meaning": 3574, - "wales": 3575, - "minor": 3576, - "spain": 3577, - "tower": 3578, - "focus": 3579, - "vice": 3580, - "frank": 3581, - "follow": 3582, - "parish": 3583, - "separate": 3584, - "golden": 3585, - "horse": 3586, - "fifth": 3587, - "remaining": 3588, - "branch": 3589, - "32": 3590, - "presented": 3591, - "stared": 3592, - "##id": 3593, - "uses": 3594, - "secret": 3595, - "forms": 3596, - "##co": 3597, - "baseball": 3598, - "exactly": 3599, - "##ck": 3600, - "choice": 3601, - "note": 3602, - "discovered": 3603, - "travel": 3604, - "composed": 3605, - "truth": 3606, - "russia": 3607, - "ball": 3608, - "color": 3609, - "kiss": 3610, - "dad": 3611, - "wind": 3612, - "continue": 3613, - "ring": 3614, - "referred": 3615, - "numbers": 3616, - "digital": 3617, - "greater": 3618, - "##ns": 3619, - "metres": 3620, - "slightly": 3621, - "direct": 3622, - "increase": 3623, - "1960": 3624, - "responsible": 3625, - "crew": 3626, - "rule": 3627, - "trees": 3628, - "troops": 3629, - "##no": 3630, - "broke": 3631, - "goes": 3632, - "individuals": 3633, - "hundred": 3634, - "weight": 3635, - "creek": 3636, - "sleep": 3637, - "memory": 3638, - "defense": 3639, - "provides": 3640, - "ordered": 3641, - "code": 3642, - "value": 3643, - "jewish": 3644, - "windows": 3645, - "1944": 3646, - "safe": 3647, - "judge": 3648, - "whatever": 3649, - "corps": 3650, - "realized": 3651, - "growing": 3652, - "pre": 3653, - "##ga": 3654, - "cities": 3655, - "alexander": 3656, - "gaze": 3657, - "lies": 3658, - "spread": 3659, - "scott": 3660, - "letter": 3661, - "showed": 3662, - "situation": 3663, - "mayor": 3664, - "transport": 3665, - "watching": 3666, - "workers": 3667, - "extended": 3668, - "##li": 3669, - "expression": 3670, - "normal": 3671, - "##ment": 3672, - "chart": 3673, - "multiple": 3674, - "border": 3675, - "##ba": 3676, - "host": 3677, - "##ner": 3678, - "daily": 3679, - "mrs": 3680, - "walls": 3681, - "piano": 3682, - "##ko": 3683, - "heat": 3684, - "cannot": 3685, - "##ate": 3686, - "earned": 3687, - "products": 3688, - "drama": 3689, - "era": 3690, - "authority": 3691, - "seasons": 3692, - "join": 3693, - "grade": 3694, - "##io": 3695, - "sign": 3696, - "difficult": 3697, - "machine": 3698, - "1963": 3699, - "territory": 3700, - "mainly": 3701, - "##wood": 3702, - "stations": 3703, - "squadron": 3704, - "1962": 3705, - "stepped": 3706, - "iron": 3707, - "19th": 3708, - "##led": 3709, - "serve": 3710, - "appear": 3711, - "sky": 3712, - "speak": 3713, - "broken": 3714, - "charge": 3715, - "knowledge": 3716, - "kilometres": 3717, - "removed": 3718, - "ships": 3719, - "article": 3720, - "campus": 3721, - "simple": 3722, - "##ty": 3723, - "pushed": 3724, - "britain": 3725, - "##ve": 3726, - "leaves": 3727, - "recently": 3728, - "cd": 3729, - "soft": 3730, - "boston": 3731, - "latter": 3732, - "easy": 3733, - "acquired": 3734, - "poland": 3735, - "##sa": 3736, - "quality": 3737, - "officers": 3738, - "presence": 3739, - "planned": 3740, - "nations": 3741, - "mass": 3742, - "broadcast": 3743, - "jean": 3744, - "share": 3745, - "image": 3746, - "influence": 3747, - "wild": 3748, - "offer": 3749, - "emperor": 3750, - "electric": 3751, - "reading": 3752, - "headed": 3753, - "ability": 3754, - "promoted": 3755, - "yellow": 3756, - "ministry": 3757, - "1942": 3758, - "throat": 3759, - "smaller": 3760, - "politician": 3761, - "##by": 3762, - "latin": 3763, - "spoke": 3764, - "cars": 3765, - "williams": 3766, - "males": 3767, - "lack": 3768, - "pop": 3769, - "80": 3770, - "##ier": 3771, - "acting": 3772, - "seeing": 3773, - "consists": 3774, - "##ti": 3775, - "estate": 3776, - "1961": 3777, - "pressure": 3778, - "johnson": 3779, - "newspaper": 3780, - "jr": 3781, - "chris": 3782, - "olympics": 3783, - "online": 3784, - "conditions": 3785, - "beat": 3786, - "elements": 3787, - "walking": 3788, - "vote": 3789, - "##field": 3790, - "needs": 3791, - "carolina": 3792, - "text": 3793, - "featuring": 3794, - "global": 3795, - "block": 3796, - "shirt": 3797, - "levels": 3798, - "francisco": 3799, - "purpose": 3800, - "females": 3801, - "et": 3802, - "dutch": 3803, - "duke": 3804, - "ahead": 3805, - "gas": 3806, - "twice": 3807, - "safety": 3808, - "serious": 3809, - "turning": 3810, - "highly": 3811, - "lieutenant": 3812, - "firm": 3813, - "maria": 3814, - "amount": 3815, - "mixed": 3816, - "daniel": 3817, - "proposed": 3818, - "perfect": 3819, - "agreement": 3820, - "affairs": 3821, - "3rd": 3822, - "seconds": 3823, - "contemporary": 3824, - "paid": 3825, - "1943": 3826, - "prison": 3827, - "save": 3828, - "kitchen": 3829, - "label": 3830, - "administrative": 3831, - "intended": 3832, - "constructed": 3833, - "academic": 3834, - "nice": 3835, - "teacher": 3836, - "races": 3837, - "1956": 3838, - "formerly": 3839, - "corporation": 3840, - "ben": 3841, - "nation": 3842, - "issued": 3843, - "shut": 3844, - "1958": 3845, - "drums": 3846, - "housing": 3847, - "victoria": 3848, - "seems": 3849, - "opera": 3850, - "1959": 3851, - "graduated": 3852, - "function": 3853, - "von": 3854, - "mentioned": 3855, - "picked": 3856, - "build": 3857, - "recognized": 3858, - "shortly": 3859, - "protection": 3860, - "picture": 3861, - "notable": 3862, - "exchange": 3863, - "elections": 3864, - "1980s": 3865, - "loved": 3866, - "percent": 3867, - "racing": 3868, - "fish": 3869, - "elizabeth": 3870, - "garden": 3871, - "volume": 3872, - "hockey": 3873, - "1941": 3874, - "beside": 3875, - "settled": 3876, - "##ford": 3877, - "1940": 3878, - "competed": 3879, - "replied": 3880, - "drew": 3881, - "1948": 3882, - "actress": 3883, - "marine": 3884, - "scotland": 3885, - "steel": 3886, - "glanced": 3887, - "farm": 3888, - "steve": 3889, - "1957": 3890, - "risk": 3891, - "tonight": 3892, - "positive": 3893, - "magic": 3894, - "singles": 3895, - "effects": 3896, - "gray": 3897, - "screen": 3898, - "dog": 3899, - "##ja": 3900, - "residents": 3901, - "bus": 3902, - "sides": 3903, - "none": 3904, - "secondary": 3905, - "literature": 3906, - "polish": 3907, - "destroyed": 3908, - "flying": 3909, - "founder": 3910, - "households": 3911, - "1939": 3912, - "lay": 3913, - "reserve": 3914, - "usa": 3915, - "gallery": 3916, - "##ler": 3917, - "1946": 3918, - "industrial": 3919, - "younger": 3920, - "approach": 3921, - "appearances": 3922, - "urban": 3923, - "ones": 3924, - "1950": 3925, - "finish": 3926, - "avenue": 3927, - "powerful": 3928, - "fully": 3929, - "growth": 3930, - "page": 3931, - "honor": 3932, - "jersey": 3933, - "projects": 3934, - "advanced": 3935, - "revealed": 3936, - "basic": 3937, - "90": 3938, - "infantry": 3939, - "pair": 3940, - "equipment": 3941, - "visit": 3942, - "33": 3943, - "evening": 3944, - "search": 3945, - "grant": 3946, - "effort": 3947, - "solo": 3948, - "treatment": 3949, - "buried": 3950, - "republican": 3951, - "primarily": 3952, - "bottom": 3953, - "owner": 3954, - "1970s": 3955, - "israel": 3956, - "gives": 3957, - "jim": 3958, - "dream": 3959, - "bob": 3960, - "remain": 3961, - "spot": 3962, - "70": 3963, - "notes": 3964, - "produce": 3965, - "champions": 3966, - "contact": 3967, - "ed": 3968, - "soul": 3969, - "accepted": 3970, - "ways": 3971, - "del": 3972, - "##ally": 3973, - "losing": 3974, - "split": 3975, - "price": 3976, - "capacity": 3977, - "basis": 3978, - "trial": 3979, - "questions": 3980, - "##ina": 3981, - "1955": 3982, - "20th": 3983, - "guess": 3984, - "officially": 3985, - "memorial": 3986, - "naval": 3987, - "initial": 3988, - "##ization": 3989, - "whispered": 3990, - "median": 3991, - "engineer": 3992, - "##ful": 3993, - "sydney": 3994, - "##go": 3995, - "columbia": 3996, - "strength": 3997, - "300": 3998, - "1952": 3999, - "tears": 4000, - "senate": 4001, - "00": 4002, - "card": 4003, - "asian": 4004, - "agent": 4005, - "1947": 4006, - "software": 4007, - "44": 4008, - "draw": 4009, - "warm": 4010, - "supposed": 4011, - "com": 4012, - "pro": 4013, - "##il": 4014, - "transferred": 4015, - "leaned": 4016, - "##at": 4017, - "candidate": 4018, - "escape": 4019, - "mountains": 4020, - "asia": 4021, - "potential": 4022, - "activity": 4023, - "entertainment": 4024, - "seem": 4025, - "traffic": 4026, - "jackson": 4027, - "murder": 4028, - "36": 4029, - "slow": 4030, - "product": 4031, - "orchestra": 4032, - "haven": 4033, - "agency": 4034, - "bbc": 4035, - "taught": 4036, - "website": 4037, - "comedy": 4038, - "unable": 4039, - "storm": 4040, - "planning": 4041, - "albums": 4042, - "rugby": 4043, - "environment": 4044, - "scientific": 4045, - "grabbed": 4046, - "protect": 4047, - "##hi": 4048, - "boat": 4049, - "typically": 4050, - "1954": 4051, - "1953": 4052, - "damage": 4053, - "principal": 4054, - "divided": 4055, - "dedicated": 4056, - "mount": 4057, - "ohio": 4058, - "##berg": 4059, - "pick": 4060, - "fought": 4061, - "driver": 4062, - "##der": 4063, - "empty": 4064, - "shoulders": 4065, - "sort": 4066, - "thank": 4067, - "berlin": 4068, - "prominent": 4069, - "account": 4070, - "freedom": 4071, - "necessary": 4072, - "efforts": 4073, - "alex": 4074, - "headquarters": 4075, - "follows": 4076, - "alongside": 4077, - "des": 4078, - "simon": 4079, - "andrew": 4080, - "suggested": 4081, - "operating": 4082, - "learning": 4083, - "steps": 4084, - "1949": 4085, - "sweet": 4086, - "technical": 4087, - "begin": 4088, - "easily": 4089, - "34": 4090, - "teeth": 4091, - "speaking": 4092, - "settlement": 4093, - "scale": 4094, - "##sh": 4095, - "renamed": 4096, - "ray": 4097, - "max": 4098, - "enemy": 4099, - "semi": 4100, - "joint": 4101, - "compared": 4102, - "##rd": 4103, - "scottish": 4104, - "leadership": 4105, - "analysis": 4106, - "offers": 4107, - "georgia": 4108, - "pieces": 4109, - "captured": 4110, - "animal": 4111, - "deputy": 4112, - "guest": 4113, - "organized": 4114, - "##lin": 4115, - "tony": 4116, - "combined": 4117, - "method": 4118, - "challenge": 4119, - "1960s": 4120, - "huge": 4121, - "wants": 4122, - "battalion": 4123, - "sons": 4124, - "rise": 4125, - "crime": 4126, - "types": 4127, - "facilities": 4128, - "telling": 4129, - "path": 4130, - "1951": 4131, - "platform": 4132, - "sit": 4133, - "1990s": 4134, - "##lo": 4135, - "tells": 4136, - "assigned": 4137, - "rich": 4138, - "pull": 4139, - "##ot": 4140, - "commonly": 4141, - "alive": 4142, - "##za": 4143, - "letters": 4144, - "concept": 4145, - "conducted": 4146, - "wearing": 4147, - "happen": 4148, - "bought": 4149, - "becomes": 4150, - "holy": 4151, - "gets": 4152, - "ocean": 4153, - "defeat": 4154, - "languages": 4155, - "purchased": 4156, - "coffee": 4157, - "occurred": 4158, - "titled": 4159, - "##q": 4160, - "declared": 4161, - "applied": 4162, - "sciences": 4163, - "concert": 4164, - "sounds": 4165, - "jazz": 4166, - "brain": 4167, - "##me": 4168, - "painting": 4169, - "fleet": 4170, - "tax": 4171, - "nick": 4172, - "##ius": 4173, - "michigan": 4174, - "count": 4175, - "animals": 4176, - "leaders": 4177, - "episodes": 4178, - "##line": 4179, - "content": 4180, - "##den": 4181, - "birth": 4182, - "##it": 4183, - "clubs": 4184, - "64": 4185, - "palace": 4186, - "critical": 4187, - "refused": 4188, - "fair": 4189, - "leg": 4190, - "laughed": 4191, - "returning": 4192, - "surrounding": 4193, - "participated": 4194, - "formation": 4195, - "lifted": 4196, - "pointed": 4197, - "connected": 4198, - "rome": 4199, - "medicine": 4200, - "laid": 4201, - "taylor": 4202, - "santa": 4203, - "powers": 4204, - "adam": 4205, - "tall": 4206, - "shared": 4207, - "focused": 4208, - "knowing": 4209, - "yards": 4210, - "entrance": 4211, - "falls": 4212, - "##wa": 4213, - "calling": 4214, - "##ad": 4215, - "sources": 4216, - "chosen": 4217, - "beneath": 4218, - "resources": 4219, - "yard": 4220, - "##ite": 4221, - "nominated": 4222, - "silence": 4223, - "zone": 4224, - "defined": 4225, - "##que": 4226, - "gained": 4227, - "thirty": 4228, - "38": 4229, - "bodies": 4230, - "moon": 4231, - "##ard": 4232, - "adopted": 4233, - "christmas": 4234, - "widely": 4235, - "register": 4236, - "apart": 4237, - "iran": 4238, - "premier": 4239, - "serves": 4240, - "du": 4241, - "unknown": 4242, - "parties": 4243, - "##les": 4244, - "generation": 4245, - "##ff": 4246, - "continues": 4247, - "quick": 4248, - "fields": 4249, - "brigade": 4250, - "quiet": 4251, - "teaching": 4252, - "clothes": 4253, - "impact": 4254, - "weapons": 4255, - "partner": 4256, - "flat": 4257, - "theater": 4258, - "supreme": 4259, - "1938": 4260, - "37": 4261, - "relations": 4262, - "##tor": 4263, - "plants": 4264, - "suffered": 4265, - "1936": 4266, - "wilson": 4267, - "kids": 4268, - "begins": 4269, - "##age": 4270, - "1918": 4271, - "seats": 4272, - "armed": 4273, - "internet": 4274, - "models": 4275, - "worth": 4276, - "laws": 4277, - "400": 4278, - "communities": 4279, - "classes": 4280, - "background": 4281, - "knows": 4282, - "thanks": 4283, - "quarter": 4284, - "reaching": 4285, - "humans": 4286, - "carry": 4287, - "killing": 4288, - "format": 4289, - "kong": 4290, - "hong": 4291, - "setting": 4292, - "75": 4293, - "architecture": 4294, - "disease": 4295, - "railroad": 4296, - "inc": 4297, - "possibly": 4298, - "wish": 4299, - "arthur": 4300, - "thoughts": 4301, - "harry": 4302, - "doors": 4303, - "density": 4304, - "##di": 4305, - "crowd": 4306, - "illinois": 4307, - "stomach": 4308, - "tone": 4309, - "unique": 4310, - "reports": 4311, - "anyway": 4312, - "##ir": 4313, - "liberal": 4314, - "der": 4315, - "vehicle": 4316, - "thick": 4317, - "dry": 4318, - "drug": 4319, - "faced": 4320, - "largely": 4321, - "facility": 4322, - "theme": 4323, - "holds": 4324, - "creation": 4325, - "strange": 4326, - "colonel": 4327, - "##mi": 4328, - "revolution": 4329, - "bell": 4330, - "politics": 4331, - "turns": 4332, - "silent": 4333, - "rail": 4334, - "relief": 4335, - "independence": 4336, - "combat": 4337, - "shape": 4338, - "write": 4339, - "determined": 4340, - "sales": 4341, - "learned": 4342, - "4th": 4343, - "finger": 4344, - "oxford": 4345, - "providing": 4346, - "1937": 4347, - "heritage": 4348, - "fiction": 4349, - "situated": 4350, - "designated": 4351, - "allowing": 4352, - "distribution": 4353, - "hosted": 4354, - "##est": 4355, - "sight": 4356, - "interview": 4357, - "estimated": 4358, - "reduced": 4359, - "##ria": 4360, - "toronto": 4361, - "footballer": 4362, - "keeping": 4363, - "guys": 4364, - "damn": 4365, - "claim": 4366, - "motion": 4367, - "sport": 4368, - "sixth": 4369, - "stayed": 4370, - "##ze": 4371, - "en": 4372, - "rear": 4373, - "receive": 4374, - "handed": 4375, - "twelve": 4376, - "dress": 4377, - "audience": 4378, - "granted": 4379, - "brazil": 4380, - "##well": 4381, - "spirit": 4382, - "##ated": 4383, - "noticed": 4384, - "etc": 4385, - "olympic": 4386, - "representative": 4387, - "eric": 4388, - "tight": 4389, - "trouble": 4390, - "reviews": 4391, - "drink": 4392, - "vampire": 4393, - "missing": 4394, - "roles": 4395, - "ranked": 4396, - "newly": 4397, - "household": 4398, - "finals": 4399, - "wave": 4400, - "critics": 4401, - "##ee": 4402, - "phase": 4403, - "massachusetts": 4404, - "pilot": 4405, - "unlike": 4406, - "philadelphia": 4407, - "bright": 4408, - "guns": 4409, - "crown": 4410, - "organizations": 4411, - "roof": 4412, - "42": 4413, - "respectively": 4414, - "clearly": 4415, - "tongue": 4416, - "marked": 4417, - "circle": 4418, - "fox": 4419, - "korea": 4420, - "bronze": 4421, - "brian": 4422, - "expanded": 4423, - "sexual": 4424, - "supply": 4425, - "yourself": 4426, - "inspired": 4427, - "labour": 4428, - "fc": 4429, - "##ah": 4430, - "reference": 4431, - "vision": 4432, - "draft": 4433, - "connection": 4434, - "brand": 4435, - "reasons": 4436, - "1935": 4437, - "classic": 4438, - "driving": 4439, - "trip": 4440, - "jesus": 4441, - "cells": 4442, - "entry": 4443, - "1920": 4444, - "neither": 4445, - "trail": 4446, - "claims": 4447, - "atlantic": 4448, - "orders": 4449, - "labor": 4450, - "nose": 4451, - "afraid": 4452, - "identified": 4453, - "intelligence": 4454, - "calls": 4455, - "cancer": 4456, - "attacked": 4457, - "passing": 4458, - "stephen": 4459, - "positions": 4460, - "imperial": 4461, - "grey": 4462, - "jason": 4463, - "39": 4464, - "sunday": 4465, - "48": 4466, - "swedish": 4467, - "avoid": 4468, - "extra": 4469, - "uncle": 4470, - "message": 4471, - "covers": 4472, - "allows": 4473, - "surprise": 4474, - "materials": 4475, - "fame": 4476, - "hunter": 4477, - "##ji": 4478, - "1930": 4479, - "citizens": 4480, - "figures": 4481, - "davis": 4482, - "environmental": 4483, - "confirmed": 4484, - "shit": 4485, - "titles": 4486, - "di": 4487, - "performing": 4488, - "difference": 4489, - "acts": 4490, - "attacks": 4491, - "##ov": 4492, - "existing": 4493, - "votes": 4494, - "opportunity": 4495, - "nor": 4496, - "shop": 4497, - "entirely": 4498, - "trains": 4499, - "opposite": 4500, - "pakistan": 4501, - "##pa": 4502, - "develop": 4503, - "resulted": 4504, - "representatives": 4505, - "actions": 4506, - "reality": 4507, - "pressed": 4508, - "##ish": 4509, - "barely": 4510, - "wine": 4511, - "conversation": 4512, - "faculty": 4513, - "northwest": 4514, - "ends": 4515, - "documentary": 4516, - "nuclear": 4517, - "stock": 4518, - "grace": 4519, - "sets": 4520, - "eat": 4521, - "alternative": 4522, - "##ps": 4523, - "bag": 4524, - "resulting": 4525, - "creating": 4526, - "surprised": 4527, - "cemetery": 4528, - "1919": 4529, - "drop": 4530, - "finding": 4531, - "sarah": 4532, - "cricket": 4533, - "streets": 4534, - "tradition": 4535, - "ride": 4536, - "1933": 4537, - "exhibition": 4538, - "target": 4539, - "ear": 4540, - "explained": 4541, - "rain": 4542, - "composer": 4543, - "injury": 4544, - "apartment": 4545, - "municipal": 4546, - "educational": 4547, - "occupied": 4548, - "netherlands": 4549, - "clean": 4550, - "billion": 4551, - "constitution": 4552, - "learn": 4553, - "1914": 4554, - "maximum": 4555, - "classical": 4556, - "francis": 4557, - "lose": 4558, - "opposition": 4559, - "jose": 4560, - "ontario": 4561, - "bear": 4562, - "core": 4563, - "hills": 4564, - "rolled": 4565, - "ending": 4566, - "drawn": 4567, - "permanent": 4568, - "fun": 4569, - "##tes": 4570, - "##lla": 4571, - "lewis": 4572, - "sites": 4573, - "chamber": 4574, - "ryan": 4575, - "##way": 4576, - "scoring": 4577, - "height": 4578, - "1934": 4579, - "##house": 4580, - "lyrics": 4581, - "staring": 4582, - "55": 4583, - "officials": 4584, - "1917": 4585, - "snow": 4586, - "oldest": 4587, - "##tic": 4588, - "orange": 4589, - "##ger": 4590, - "qualified": 4591, - "interior": 4592, - "apparently": 4593, - "succeeded": 4594, - "thousand": 4595, - "dinner": 4596, - "lights": 4597, - "existence": 4598, - "fans": 4599, - "heavily": 4600, - "41": 4601, - "greatest": 4602, - "conservative": 4603, - "send": 4604, - "bowl": 4605, - "plus": 4606, - "enter": 4607, - "catch": 4608, - "##un": 4609, - "economy": 4610, - "duty": 4611, - "1929": 4612, - "speech": 4613, - "authorities": 4614, - "princess": 4615, - "performances": 4616, - "versions": 4617, - "shall": 4618, - "graduate": 4619, - "pictures": 4620, - "effective": 4621, - "remembered": 4622, - "poetry": 4623, - "desk": 4624, - "crossed": 4625, - "starring": 4626, - "starts": 4627, - "passenger": 4628, - "sharp": 4629, - "##ant": 4630, - "acres": 4631, - "ass": 4632, - "weather": 4633, - "falling": 4634, - "rank": 4635, - "fund": 4636, - "supporting": 4637, - "check": 4638, - "adult": 4639, - "publishing": 4640, - "heads": 4641, - "cm": 4642, - "southeast": 4643, - "lane": 4644, - "##burg": 4645, - "application": 4646, - "bc": 4647, - "##ura": 4648, - "les": 4649, - "condition": 4650, - "transfer": 4651, - "prevent": 4652, - "display": 4653, - "ex": 4654, - "regions": 4655, - "earl": 4656, - "federation": 4657, - "cool": 4658, - "relatively": 4659, - "answered": 4660, - "besides": 4661, - "1928": 4662, - "obtained": 4663, - "portion": 4664, - "##town": 4665, - "mix": 4666, - "##ding": 4667, - "reaction": 4668, - "liked": 4669, - "dean": 4670, - "express": 4671, - "peak": 4672, - "1932": 4673, - "##tte": 4674, - "counter": 4675, - "religion": 4676, - "chain": 4677, - "rare": 4678, - "miller": 4679, - "convention": 4680, - "aid": 4681, - "lie": 4682, - "vehicles": 4683, - "mobile": 4684, - "perform": 4685, - "squad": 4686, - "wonder": 4687, - "lying": 4688, - "crazy": 4689, - "sword": 4690, - "##ping": 4691, - "attempted": 4692, - "centuries": 4693, - "weren": 4694, - "philosophy": 4695, - "category": 4696, - "##ize": 4697, - "anna": 4698, - "interested": 4699, - "47": 4700, - "sweden": 4701, - "wolf": 4702, - "frequently": 4703, - "abandoned": 4704, - "kg": 4705, - "literary": 4706, - "alliance": 4707, - "task": 4708, - "entitled": 4709, - "##ay": 4710, - "threw": 4711, - "promotion": 4712, - "factory": 4713, - "tiny": 4714, - "soccer": 4715, - "visited": 4716, - "matt": 4717, - "fm": 4718, - "achieved": 4719, - "52": 4720, - "defence": 4721, - "internal": 4722, - "persian": 4723, - "43": 4724, - "methods": 4725, - "##ging": 4726, - "arrested": 4727, - "otherwise": 4728, - "cambridge": 4729, - "programming": 4730, - "villages": 4731, - "elementary": 4732, - "districts": 4733, - "rooms": 4734, - "criminal": 4735, - "conflict": 4736, - "worry": 4737, - "trained": 4738, - "1931": 4739, - "attempts": 4740, - "waited": 4741, - "signal": 4742, - "bird": 4743, - "truck": 4744, - "subsequent": 4745, - "programme": 4746, - "##ol": 4747, - "ad": 4748, - "49": 4749, - "communist": 4750, - "details": 4751, - "faith": 4752, - "sector": 4753, - "patrick": 4754, - "carrying": 4755, - "laugh": 4756, - "##ss": 4757, - "controlled": 4758, - "korean": 4759, - "showing": 4760, - "origin": 4761, - "fuel": 4762, - "evil": 4763, - "1927": 4764, - "##ent": 4765, - "brief": 4766, - "identity": 4767, - "darkness": 4768, - "address": 4769, - "pool": 4770, - "missed": 4771, - "publication": 4772, - "web": 4773, - "planet": 4774, - "ian": 4775, - "anne": 4776, - "wings": 4777, - "invited": 4778, - "##tt": 4779, - "briefly": 4780, - "standards": 4781, - "kissed": 4782, - "##be": 4783, - "ideas": 4784, - "climate": 4785, - "causing": 4786, - "walter": 4787, - "worse": 4788, - "albert": 4789, - "articles": 4790, - "winners": 4791, - "desire": 4792, - "aged": 4793, - "northeast": 4794, - "dangerous": 4795, - "gate": 4796, - "doubt": 4797, - "1922": 4798, - "wooden": 4799, - "multi": 4800, - "##ky": 4801, - "poet": 4802, - "rising": 4803, - "funding": 4804, - "46": 4805, - "communications": 4806, - "communication": 4807, - "violence": 4808, - "copies": 4809, - "prepared": 4810, - "ford": 4811, - "investigation": 4812, - "skills": 4813, - "1924": 4814, - "pulling": 4815, - "electronic": 4816, - "##ak": 4817, - "##ial": 4818, - "##han": 4819, - "containing": 4820, - "ultimately": 4821, - "offices": 4822, - "singing": 4823, - "understanding": 4824, - "restaurant": 4825, - "tomorrow": 4826, - "fashion": 4827, - "christ": 4828, - "ward": 4829, - "da": 4830, - "pope": 4831, - "stands": 4832, - "5th": 4833, - "flow": 4834, - "studios": 4835, - "aired": 4836, - "commissioned": 4837, - "contained": 4838, - "exist": 4839, - "fresh": 4840, - "americans": 4841, - "##per": 4842, - "wrestling": 4843, - "approved": 4844, - "kid": 4845, - "employed": 4846, - "respect": 4847, - "suit": 4848, - "1925": 4849, - "angel": 4850, - "asking": 4851, - "increasing": 4852, - "frame": 4853, - "angry": 4854, - "selling": 4855, - "1950s": 4856, - "thin": 4857, - "finds": 4858, - "##nd": 4859, - "temperature": 4860, - "statement": 4861, - "ali": 4862, - "explain": 4863, - "inhabitants": 4864, - "towns": 4865, - "extensive": 4866, - "narrow": 4867, - "51": 4868, - "jane": 4869, - "flowers": 4870, - "images": 4871, - "promise": 4872, - "somewhere": 4873, - "object": 4874, - "fly": 4875, - "closely": 4876, - "##ls": 4877, - "1912": 4878, - "bureau": 4879, - "cape": 4880, - "1926": 4881, - "weekly": 4882, - "presidential": 4883, - "legislative": 4884, - "1921": 4885, - "##ai": 4886, - "##au": 4887, - "launch": 4888, - "founding": 4889, - "##ny": 4890, - "978": 4891, - "##ring": 4892, - "artillery": 4893, - "strike": 4894, - "un": 4895, - "institutions": 4896, - "roll": 4897, - "writers": 4898, - "landing": 4899, - "chose": 4900, - "kevin": 4901, - "anymore": 4902, - "pp": 4903, - "##ut": 4904, - "attorney": 4905, - "fit": 4906, - "dan": 4907, - "billboard": 4908, - "receiving": 4909, - "agricultural": 4910, - "breaking": 4911, - "sought": 4912, - "dave": 4913, - "admitted": 4914, - "lands": 4915, - "mexican": 4916, - "##bury": 4917, - "charlie": 4918, - "specifically": 4919, - "hole": 4920, - "iv": 4921, - "howard": 4922, - "credit": 4923, - "moscow": 4924, - "roads": 4925, - "accident": 4926, - "1923": 4927, - "proved": 4928, - "wear": 4929, - "struck": 4930, - "hey": 4931, - "guards": 4932, - "stuff": 4933, - "slid": 4934, - "expansion": 4935, - "1915": 4936, - "cat": 4937, - "anthony": 4938, - "##kin": 4939, - "melbourne": 4940, - "opposed": 4941, - "sub": 4942, - "southwest": 4943, - "architect": 4944, - "failure": 4945, - "plane": 4946, - "1916": 4947, - "##ron": 4948, - "map": 4949, - "camera": 4950, - "tank": 4951, - "listen": 4952, - "regarding": 4953, - "wet": 4954, - "introduction": 4955, - "metropolitan": 4956, - "link": 4957, - "ep": 4958, - "fighter": 4959, - "inch": 4960, - "grown": 4961, - "gene": 4962, - "anger": 4963, - "fixed": 4964, - "buy": 4965, - "dvd": 4966, - "khan": 4967, - "domestic": 4968, - "worldwide": 4969, - "chapel": 4970, - "mill": 4971, - "functions": 4972, - "examples": 4973, - "##head": 4974, - "developing": 4975, - "1910": 4976, - "turkey": 4977, - "hits": 4978, - "pocket": 4979, - "antonio": 4980, - "papers": 4981, - "grow": 4982, - "unless": 4983, - "circuit": 4984, - "18th": 4985, - "concerned": 4986, - "attached": 4987, - "journalist": 4988, - "selection": 4989, - "journey": 4990, - "converted": 4991, - "provincial": 4992, - "painted": 4993, - "hearing": 4994, - "aren": 4995, - "bands": 4996, - "negative": 4997, - "aside": 4998, - "wondered": 4999, - "knight": 5000, - "lap": 5001, - "survey": 5002, - "ma": 5003, - "##ow": 5004, - "noise": 5005, - "billy": 5006, - "##ium": 5007, - "shooting": 5008, - "guide": 5009, - "bedroom": 5010, - "priest": 5011, - "resistance": 5012, - "motor": 5013, - "homes": 5014, - "sounded": 5015, - "giant": 5016, - "##mer": 5017, - "150": 5018, - "scenes": 5019, - "equal": 5020, - "comic": 5021, - "patients": 5022, - "hidden": 5023, - "solid": 5024, - "actual": 5025, - "bringing": 5026, - "afternoon": 5027, - "touched": 5028, - "funds": 5029, - "wedding": 5030, - "consisted": 5031, - "marie": 5032, - "canal": 5033, - "sr": 5034, - "kim": 5035, - "treaty": 5036, - "turkish": 5037, - "recognition": 5038, - "residence": 5039, - "cathedral": 5040, - "broad": 5041, - "knees": 5042, - "incident": 5043, - "shaped": 5044, - "fired": 5045, - "norwegian": 5046, - "handle": 5047, - "cheek": 5048, - "contest": 5049, - "represent": 5050, - "##pe": 5051, - "representing": 5052, - "beauty": 5053, - "##sen": 5054, - "birds": 5055, - "advantage": 5056, - "emergency": 5057, - "wrapped": 5058, - "drawing": 5059, - "notice": 5060, - "pink": 5061, - "broadcasting": 5062, - "##ong": 5063, - "somehow": 5064, - "bachelor": 5065, - "seventh": 5066, - "collected": 5067, - "registered": 5068, - "establishment": 5069, - "alan": 5070, - "assumed": 5071, - "chemical": 5072, - "personnel": 5073, - "roger": 5074, - "retirement": 5075, - "jeff": 5076, - "portuguese": 5077, - "wore": 5078, - "tied": 5079, - "device": 5080, - "threat": 5081, - "progress": 5082, - "advance": 5083, - "##ised": 5084, - "banks": 5085, - "hired": 5086, - "manchester": 5087, - "nfl": 5088, - "teachers": 5089, - "structures": 5090, - "forever": 5091, - "##bo": 5092, - "tennis": 5093, - "helping": 5094, - "saturday": 5095, - "sale": 5096, - "applications": 5097, - "junction": 5098, - "hip": 5099, - "incorporated": 5100, - "neighborhood": 5101, - "dressed": 5102, - "ceremony": 5103, - "##ds": 5104, - "influenced": 5105, - "hers": 5106, - "visual": 5107, - "stairs": 5108, - "decades": 5109, - "inner": 5110, - "kansas": 5111, - "hung": 5112, - "hoped": 5113, - "gain": 5114, - "scheduled": 5115, - "downtown": 5116, - "engaged": 5117, - "austria": 5118, - "clock": 5119, - "norway": 5120, - "certainly": 5121, - "pale": 5122, - "protected": 5123, - "1913": 5124, - "victor": 5125, - "employees": 5126, - "plate": 5127, - "putting": 5128, - "surrounded": 5129, - "##ists": 5130, - "finishing": 5131, - "blues": 5132, - "tropical": 5133, - "##ries": 5134, - "minnesota": 5135, - "consider": 5136, - "philippines": 5137, - "accept": 5138, - "54": 5139, - "retrieved": 5140, - "1900": 5141, - "concern": 5142, - "anderson": 5143, - "properties": 5144, - "institution": 5145, - "gordon": 5146, - "successfully": 5147, - "vietnam": 5148, - "##dy": 5149, - "backing": 5150, - "outstanding": 5151, - "muslim": 5152, - "crossing": 5153, - "folk": 5154, - "producing": 5155, - "usual": 5156, - "demand": 5157, - "occurs": 5158, - "observed": 5159, - "lawyer": 5160, - "educated": 5161, - "##ana": 5162, - "kelly": 5163, - "string": 5164, - "pleasure": 5165, - "budget": 5166, - "items": 5167, - "quietly": 5168, - "colorado": 5169, - "philip": 5170, - "typical": 5171, - "##worth": 5172, - "derived": 5173, - "600": 5174, - "survived": 5175, - "asks": 5176, - "mental": 5177, - "##ide": 5178, - "56": 5179, - "jake": 5180, - "jews": 5181, - "distinguished": 5182, - "ltd": 5183, - "1911": 5184, - "sri": 5185, - "extremely": 5186, - "53": 5187, - "athletic": 5188, - "loud": 5189, - "thousands": 5190, - "worried": 5191, - "shadow": 5192, - "transportation": 5193, - "horses": 5194, - "weapon": 5195, - "arena": 5196, - "importance": 5197, - "users": 5198, - "tim": 5199, - "objects": 5200, - "contributed": 5201, - "dragon": 5202, - "douglas": 5203, - "aware": 5204, - "senator": 5205, - "johnny": 5206, - "jordan": 5207, - "sisters": 5208, - "engines": 5209, - "flag": 5210, - "investment": 5211, - "samuel": 5212, - "shock": 5213, - "capable": 5214, - "clark": 5215, - "row": 5216, - "wheel": 5217, - "refers": 5218, - "session": 5219, - "familiar": 5220, - "biggest": 5221, - "wins": 5222, - "hate": 5223, - "maintained": 5224, - "drove": 5225, - "hamilton": 5226, - "request": 5227, - "expressed": 5228, - "injured": 5229, - "underground": 5230, - "churches": 5231, - "walker": 5232, - "wars": 5233, - "tunnel": 5234, - "passes": 5235, - "stupid": 5236, - "agriculture": 5237, - "softly": 5238, - "cabinet": 5239, - "regarded": 5240, - "joining": 5241, - "indiana": 5242, - "##ea": 5243, - "##ms": 5244, - "push": 5245, - "dates": 5246, - "spend": 5247, - "behavior": 5248, - "woods": 5249, - "protein": 5250, - "gently": 5251, - "chase": 5252, - "morgan": 5253, - "mention": 5254, - "burning": 5255, - "wake": 5256, - "combination": 5257, - "occur": 5258, - "mirror": 5259, - "leads": 5260, - "jimmy": 5261, - "indeed": 5262, - "impossible": 5263, - "singapore": 5264, - "paintings": 5265, - "covering": 5266, - "##nes": 5267, - "soldier": 5268, - "locations": 5269, - "attendance": 5270, - "sell": 5271, - "historian": 5272, - "wisconsin": 5273, - "invasion": 5274, - "argued": 5275, - "painter": 5276, - "diego": 5277, - "changing": 5278, - "egypt": 5279, - "##don": 5280, - "experienced": 5281, - "inches": 5282, - "##ku": 5283, - "missouri": 5284, - "vol": 5285, - "grounds": 5286, - "spoken": 5287, - "switzerland": 5288, - "##gan": 5289, - "reform": 5290, - "rolling": 5291, - "ha": 5292, - "forget": 5293, - "massive": 5294, - "resigned": 5295, - "burned": 5296, - "allen": 5297, - "tennessee": 5298, - "locked": 5299, - "values": 5300, - "improved": 5301, - "##mo": 5302, - "wounded": 5303, - "universe": 5304, - "sick": 5305, - "dating": 5306, - "facing": 5307, - "pack": 5308, - "purchase": 5309, - "user": 5310, - "##pur": 5311, - "moments": 5312, - "##ul": 5313, - "merged": 5314, - "anniversary": 5315, - "1908": 5316, - "coal": 5317, - "brick": 5318, - "understood": 5319, - "causes": 5320, - "dynasty": 5321, - "queensland": 5322, - "establish": 5323, - "stores": 5324, - "crisis": 5325, - "promote": 5326, - "hoping": 5327, - "views": 5328, - "cards": 5329, - "referee": 5330, - "extension": 5331, - "##si": 5332, - "raise": 5333, - "arizona": 5334, - "improve": 5335, - "colonial": 5336, - "formal": 5337, - "charged": 5338, - "##rt": 5339, - "palm": 5340, - "lucky": 5341, - "hide": 5342, - "rescue": 5343, - "faces": 5344, - "95": 5345, - "feelings": 5346, - "candidates": 5347, - "juan": 5348, - "##ell": 5349, - "goods": 5350, - "6th": 5351, - "courses": 5352, - "weekend": 5353, - "59": 5354, - "luke": 5355, - "cash": 5356, - "fallen": 5357, - "##om": 5358, - "delivered": 5359, - "affected": 5360, - "installed": 5361, - "carefully": 5362, - "tries": 5363, - "swiss": 5364, - "hollywood": 5365, - "costs": 5366, - "lincoln": 5367, - "responsibility": 5368, - "##he": 5369, - "shore": 5370, - "file": 5371, - "proper": 5372, - "normally": 5373, - "maryland": 5374, - "assistance": 5375, - "jump": 5376, - "constant": 5377, - "offering": 5378, - "friendly": 5379, - "waters": 5380, - "persons": 5381, - "realize": 5382, - "contain": 5383, - "trophy": 5384, - "800": 5385, - "partnership": 5386, - "factor": 5387, - "58": 5388, - "musicians": 5389, - "cry": 5390, - "bound": 5391, - "oregon": 5392, - "indicated": 5393, - "hero": 5394, - "houston": 5395, - "medium": 5396, - "##ure": 5397, - "consisting": 5398, - "somewhat": 5399, - "##ara": 5400, - "57": 5401, - "cycle": 5402, - "##che": 5403, - "beer": 5404, - "moore": 5405, - "frederick": 5406, - "gotten": 5407, - "eleven": 5408, - "worst": 5409, - "weak": 5410, - "approached": 5411, - "arranged": 5412, - "chin": 5413, - "loan": 5414, - "universal": 5415, - "bond": 5416, - "fifteen": 5417, - "pattern": 5418, - "disappeared": 5419, - "##ney": 5420, - "translated": 5421, - "##zed": 5422, - "lip": 5423, - "arab": 5424, - "capture": 5425, - "interests": 5426, - "insurance": 5427, - "##chi": 5428, - "shifted": 5429, - "cave": 5430, - "prix": 5431, - "warning": 5432, - "sections": 5433, - "courts": 5434, - "coat": 5435, - "plot": 5436, - "smell": 5437, - "feed": 5438, - "golf": 5439, - "favorite": 5440, - "maintain": 5441, - "knife": 5442, - "vs": 5443, - "voted": 5444, - "degrees": 5445, - "finance": 5446, - "quebec": 5447, - "opinion": 5448, - "translation": 5449, - "manner": 5450, - "ruled": 5451, - "operate": 5452, - "productions": 5453, - "choose": 5454, - "musician": 5455, - "discovery": 5456, - "confused": 5457, - "tired": 5458, - "separated": 5459, - "stream": 5460, - "techniques": 5461, - "committed": 5462, - "attend": 5463, - "ranking": 5464, - "kings": 5465, - "throw": 5466, - "passengers": 5467, - "measure": 5468, - "horror": 5469, - "fan": 5470, - "mining": 5471, - "sand": 5472, - "danger": 5473, - "salt": 5474, - "calm": 5475, - "decade": 5476, - "dam": 5477, - "require": 5478, - "runner": 5479, - "##ik": 5480, - "rush": 5481, - "associate": 5482, - "greece": 5483, - "##ker": 5484, - "rivers": 5485, - "consecutive": 5486, - "matthew": 5487, - "##ski": 5488, - "sighed": 5489, - "sq": 5490, - "documents": 5491, - "steam": 5492, - "edited": 5493, - "closing": 5494, - "tie": 5495, - "accused": 5496, - "1905": 5497, - "##ini": 5498, - "islamic": 5499, - "distributed": 5500, - "directors": 5501, - "organisation": 5502, - "bruce": 5503, - "7th": 5504, - "breathing": 5505, - "mad": 5506, - "lit": 5507, - "arrival": 5508, - "concrete": 5509, - "taste": 5510, - "08": 5511, - "composition": 5512, - "shaking": 5513, - "faster": 5514, - "amateur": 5515, - "adjacent": 5516, - "stating": 5517, - "1906": 5518, - "twin": 5519, - "flew": 5520, - "##ran": 5521, - "tokyo": 5522, - "publications": 5523, - "##tone": 5524, - "obviously": 5525, - "ridge": 5526, - "storage": 5527, - "1907": 5528, - "carl": 5529, - "pages": 5530, - "concluded": 5531, - "desert": 5532, - "driven": 5533, - "universities": 5534, - "ages": 5535, - "terminal": 5536, - "sequence": 5537, - "borough": 5538, - "250": 5539, - "constituency": 5540, - "creative": 5541, - "cousin": 5542, - "economics": 5543, - "dreams": 5544, - "margaret": 5545, - "notably": 5546, - "reduce": 5547, - "montreal": 5548, - "mode": 5549, - "17th": 5550, - "ears": 5551, - "saved": 5552, - "jan": 5553, - "vocal": 5554, - "##ica": 5555, - "1909": 5556, - "andy": 5557, - "##jo": 5558, - "riding": 5559, - "roughly": 5560, - "threatened": 5561, - "##ise": 5562, - "meters": 5563, - "meanwhile": 5564, - "landed": 5565, - "compete": 5566, - "repeated": 5567, - "grass": 5568, - "czech": 5569, - "regularly": 5570, - "charges": 5571, - "tea": 5572, - "sudden": 5573, - "appeal": 5574, - "##ung": 5575, - "solution": 5576, - "describes": 5577, - "pierre": 5578, - "classification": 5579, - "glad": 5580, - "parking": 5581, - "##ning": 5582, - "belt": 5583, - "physics": 5584, - "99": 5585, - "rachel": 5586, - "add": 5587, - "hungarian": 5588, - "participate": 5589, - "expedition": 5590, - "damaged": 5591, - "gift": 5592, - "childhood": 5593, - "85": 5594, - "fifty": 5595, - "##red": 5596, - "mathematics": 5597, - "jumped": 5598, - "letting": 5599, - "defensive": 5600, - "mph": 5601, - "##ux": 5602, - "##gh": 5603, - "testing": 5604, - "##hip": 5605, - "hundreds": 5606, - "shoot": 5607, - "owners": 5608, - "matters": 5609, - "smoke": 5610, - "israeli": 5611, - "kentucky": 5612, - "dancing": 5613, - "mounted": 5614, - "grandfather": 5615, - "emma": 5616, - "designs": 5617, - "profit": 5618, - "argentina": 5619, - "##gs": 5620, - "truly": 5621, - "li": 5622, - "lawrence": 5623, - "cole": 5624, - "begun": 5625, - "detroit": 5626, - "willing": 5627, - "branches": 5628, - "smiling": 5629, - "decide": 5630, - "miami": 5631, - "enjoyed": 5632, - "recordings": 5633, - "##dale": 5634, - "poverty": 5635, - "ethnic": 5636, - "gay": 5637, - "##bi": 5638, - "gary": 5639, - "arabic": 5640, - "09": 5641, - "accompanied": 5642, - "##one": 5643, - "##ons": 5644, - "fishing": 5645, - "determine": 5646, - "residential": 5647, - "acid": 5648, - "##ary": 5649, - "alice": 5650, - "returns": 5651, - "starred": 5652, - "mail": 5653, - "##ang": 5654, - "jonathan": 5655, - "strategy": 5656, - "##ue": 5657, - "net": 5658, - "forty": 5659, - "cook": 5660, - "businesses": 5661, - "equivalent": 5662, - "commonwealth": 5663, - "distinct": 5664, - "ill": 5665, - "##cy": 5666, - "seriously": 5667, - "##ors": 5668, - "##ped": 5669, - "shift": 5670, - "harris": 5671, - "replace": 5672, - "rio": 5673, - "imagine": 5674, - "formula": 5675, - "ensure": 5676, - "##ber": 5677, - "additionally": 5678, - "scheme": 5679, - "conservation": 5680, - "occasionally": 5681, - "purposes": 5682, - "feels": 5683, - "favor": 5684, - "##and": 5685, - "##ore": 5686, - "1930s": 5687, - "contrast": 5688, - "hanging": 5689, - "hunt": 5690, - "movies": 5691, - "1904": 5692, - "instruments": 5693, - "victims": 5694, - "danish": 5695, - "christopher": 5696, - "busy": 5697, - "demon": 5698, - "sugar": 5699, - "earliest": 5700, - "colony": 5701, - "studying": 5702, - "balance": 5703, - "duties": 5704, - "##ks": 5705, - "belgium": 5706, - "slipped": 5707, - "carter": 5708, - "05": 5709, - "visible": 5710, - "stages": 5711, - "iraq": 5712, - "fifa": 5713, - "##im": 5714, - "commune": 5715, - "forming": 5716, - "zero": 5717, - "07": 5718, - "continuing": 5719, - "talked": 5720, - "counties": 5721, - "legend": 5722, - "bathroom": 5723, - "option": 5724, - "tail": 5725, - "clay": 5726, - "daughters": 5727, - "afterwards": 5728, - "severe": 5729, - "jaw": 5730, - "visitors": 5731, - "##ded": 5732, - "devices": 5733, - "aviation": 5734, - "russell": 5735, - "kate": 5736, - "##vi": 5737, - "entering": 5738, - "subjects": 5739, - "##ino": 5740, - "temporary": 5741, - "swimming": 5742, - "forth": 5743, - "smooth": 5744, - "ghost": 5745, - "audio": 5746, - "bush": 5747, - "operates": 5748, - "rocks": 5749, - "movements": 5750, - "signs": 5751, - "eddie": 5752, - "##tz": 5753, - "ann": 5754, - "voices": 5755, - "honorary": 5756, - "06": 5757, - "memories": 5758, - "dallas": 5759, - "pure": 5760, - "measures": 5761, - "racial": 5762, - "promised": 5763, - "66": 5764, - "harvard": 5765, - "ceo": 5766, - "16th": 5767, - "parliamentary": 5768, - "indicate": 5769, - "benefit": 5770, - "flesh": 5771, - "dublin": 5772, - "louisiana": 5773, - "1902": 5774, - "1901": 5775, - "patient": 5776, - "sleeping": 5777, - "1903": 5778, - "membership": 5779, - "coastal": 5780, - "medieval": 5781, - "wanting": 5782, - "element": 5783, - "scholars": 5784, - "rice": 5785, - "62": 5786, - "limit": 5787, - "survive": 5788, - "makeup": 5789, - "rating": 5790, - "definitely": 5791, - "collaboration": 5792, - "obvious": 5793, - "##tan": 5794, - "boss": 5795, - "ms": 5796, - "baron": 5797, - "birthday": 5798, - "linked": 5799, - "soil": 5800, - "diocese": 5801, - "##lan": 5802, - "ncaa": 5803, - "##mann": 5804, - "offensive": 5805, - "shell": 5806, - "shouldn": 5807, - "waist": 5808, - "##tus": 5809, - "plain": 5810, - "ross": 5811, - "organ": 5812, - "resolution": 5813, - "manufacturing": 5814, - "adding": 5815, - "relative": 5816, - "kennedy": 5817, - "98": 5818, - "whilst": 5819, - "moth": 5820, - "marketing": 5821, - "gardens": 5822, - "crash": 5823, - "72": 5824, - "heading": 5825, - "partners": 5826, - "credited": 5827, - "carlos": 5828, - "moves": 5829, - "cable": 5830, - "##zi": 5831, - "marshall": 5832, - "##out": 5833, - "depending": 5834, - "bottle": 5835, - "represents": 5836, - "rejected": 5837, - "responded": 5838, - "existed": 5839, - "04": 5840, - "jobs": 5841, - "denmark": 5842, - "lock": 5843, - "##ating": 5844, - "treated": 5845, - "graham": 5846, - "routes": 5847, - "talent": 5848, - "commissioner": 5849, - "drugs": 5850, - "secure": 5851, - "tests": 5852, - "reign": 5853, - "restored": 5854, - "photography": 5855, - "##gi": 5856, - "contributions": 5857, - "oklahoma": 5858, - "designer": 5859, - "disc": 5860, - "grin": 5861, - "seattle": 5862, - "robin": 5863, - "paused": 5864, - "atlanta": 5865, - "unusual": 5866, - "##gate": 5867, - "praised": 5868, - "las": 5869, - "laughing": 5870, - "satellite": 5871, - "hungary": 5872, - "visiting": 5873, - "##sky": 5874, - "interesting": 5875, - "factors": 5876, - "deck": 5877, - "poems": 5878, - "norman": 5879, - "##water": 5880, - "stuck": 5881, - "speaker": 5882, - "rifle": 5883, - "domain": 5884, - "premiered": 5885, - "##her": 5886, - "dc": 5887, - "comics": 5888, - "actors": 5889, - "01": 5890, - "reputation": 5891, - "eliminated": 5892, - "8th": 5893, - "ceiling": 5894, - "prisoners": 5895, - "script": 5896, - "##nce": 5897, - "leather": 5898, - "austin": 5899, - "mississippi": 5900, - "rapidly": 5901, - "admiral": 5902, - "parallel": 5903, - "charlotte": 5904, - "guilty": 5905, - "tools": 5906, - "gender": 5907, - "divisions": 5908, - "fruit": 5909, - "##bs": 5910, - "laboratory": 5911, - "nelson": 5912, - "fantasy": 5913, - "marry": 5914, - "rapid": 5915, - "aunt": 5916, - "tribe": 5917, - "requirements": 5918, - "aspects": 5919, - "suicide": 5920, - "amongst": 5921, - "adams": 5922, - "bone": 5923, - "ukraine": 5924, - "abc": 5925, - "kick": 5926, - "sees": 5927, - "edinburgh": 5928, - "clothing": 5929, - "column": 5930, - "rough": 5931, - "gods": 5932, - "hunting": 5933, - "broadway": 5934, - "gathered": 5935, - "concerns": 5936, - "##ek": 5937, - "spending": 5938, - "ty": 5939, - "12th": 5940, - "snapped": 5941, - "requires": 5942, - "solar": 5943, - "bones": 5944, - "cavalry": 5945, - "##tta": 5946, - "iowa": 5947, - "drinking": 5948, - "waste": 5949, - "index": 5950, - "franklin": 5951, - "charity": 5952, - "thompson": 5953, - "stewart": 5954, - "tip": 5955, - "flash": 5956, - "landscape": 5957, - "friday": 5958, - "enjoy": 5959, - "singh": 5960, - "poem": 5961, - "listening": 5962, - "##back": 5963, - "eighth": 5964, - "fred": 5965, - "differences": 5966, - "adapted": 5967, - "bomb": 5968, - "ukrainian": 5969, - "surgery": 5970, - "corporate": 5971, - "masters": 5972, - "anywhere": 5973, - "##more": 5974, - "waves": 5975, - "odd": 5976, - "sean": 5977, - "portugal": 5978, - "orleans": 5979, - "dick": 5980, - "debate": 5981, - "kent": 5982, - "eating": 5983, - "puerto": 5984, - "cleared": 5985, - "96": 5986, - "expect": 5987, - "cinema": 5988, - "97": 5989, - "guitarist": 5990, - "blocks": 5991, - "electrical": 5992, - "agree": 5993, - "involving": 5994, - "depth": 5995, - "dying": 5996, - "panel": 5997, - "struggle": 5998, - "##ged": 5999, - "peninsula": 6000, - "adults": 6001, - "novels": 6002, - "emerged": 6003, - "vienna": 6004, - "metro": 6005, - "debuted": 6006, - "shoes": 6007, - "tamil": 6008, - "songwriter": 6009, - "meets": 6010, - "prove": 6011, - "beating": 6012, - "instance": 6013, - "heaven": 6014, - "scared": 6015, - "sending": 6016, - "marks": 6017, - "artistic": 6018, - "passage": 6019, - "superior": 6020, - "03": 6021, - "significantly": 6022, - "shopping": 6023, - "##tive": 6024, - "retained": 6025, - "##izing": 6026, - "malaysia": 6027, - "technique": 6028, - "cheeks": 6029, - "##ola": 6030, - "warren": 6031, - "maintenance": 6032, - "destroy": 6033, - "extreme": 6034, - "allied": 6035, - "120": 6036, - "appearing": 6037, - "##yn": 6038, - "fill": 6039, - "advice": 6040, - "alabama": 6041, - "qualifying": 6042, - "policies": 6043, - "cleveland": 6044, - "hat": 6045, - "battery": 6046, - "smart": 6047, - "authors": 6048, - "10th": 6049, - "soundtrack": 6050, - "acted": 6051, - "dated": 6052, - "lb": 6053, - "glance": 6054, - "equipped": 6055, - "coalition": 6056, - "funny": 6057, - "outer": 6058, - "ambassador": 6059, - "roy": 6060, - "possibility": 6061, - "couples": 6062, - "campbell": 6063, - "dna": 6064, - "loose": 6065, - "ethan": 6066, - "supplies": 6067, - "1898": 6068, - "gonna": 6069, - "88": 6070, - "monster": 6071, - "##res": 6072, - "shake": 6073, - "agents": 6074, - "frequency": 6075, - "springs": 6076, - "dogs": 6077, - "practices": 6078, - "61": 6079, - "gang": 6080, - "plastic": 6081, - "easier": 6082, - "suggests": 6083, - "gulf": 6084, - "blade": 6085, - "exposed": 6086, - "colors": 6087, - "industries": 6088, - "markets": 6089, - "pan": 6090, - "nervous": 6091, - "electoral": 6092, - "charts": 6093, - "legislation": 6094, - "ownership": 6095, - "##idae": 6096, - "mac": 6097, - "appointment": 6098, - "shield": 6099, - "copy": 6100, - "assault": 6101, - "socialist": 6102, - "abbey": 6103, - "monument": 6104, - "license": 6105, - "throne": 6106, - "employment": 6107, - "jay": 6108, - "93": 6109, - "replacement": 6110, - "charter": 6111, - "cloud": 6112, - "powered": 6113, - "suffering": 6114, - "accounts": 6115, - "oak": 6116, - "connecticut": 6117, - "strongly": 6118, - "wright": 6119, - "colour": 6120, - "crystal": 6121, - "13th": 6122, - "context": 6123, - "welsh": 6124, - "networks": 6125, - "voiced": 6126, - "gabriel": 6127, - "jerry": 6128, - "##cing": 6129, - "forehead": 6130, - "mp": 6131, - "##ens": 6132, - "manage": 6133, - "schedule": 6134, - "totally": 6135, - "remix": 6136, - "##ii": 6137, - "forests": 6138, - "occupation": 6139, - "print": 6140, - "nicholas": 6141, - "brazilian": 6142, - "strategic": 6143, - "vampires": 6144, - "engineers": 6145, - "76": 6146, - "roots": 6147, - "seek": 6148, - "correct": 6149, - "instrumental": 6150, - "und": 6151, - "alfred": 6152, - "backed": 6153, - "hop": 6154, - "##des": 6155, - "stanley": 6156, - "robinson": 6157, - "traveled": 6158, - "wayne": 6159, - "welcome": 6160, - "austrian": 6161, - "achieve": 6162, - "67": 6163, - "exit": 6164, - "rates": 6165, - "1899": 6166, - "strip": 6167, - "whereas": 6168, - "##cs": 6169, - "sing": 6170, - "deeply": 6171, - "adventure": 6172, - "bobby": 6173, - "rick": 6174, - "jamie": 6175, - "careful": 6176, - "components": 6177, - "cap": 6178, - "useful": 6179, - "personality": 6180, - "knee": 6181, - "##shi": 6182, - "pushing": 6183, - "hosts": 6184, - "02": 6185, - "protest": 6186, - "ca": 6187, - "ottoman": 6188, - "symphony": 6189, - "##sis": 6190, - "63": 6191, - "boundary": 6192, - "1890": 6193, - "processes": 6194, - "considering": 6195, - "considerable": 6196, - "tons": 6197, - "##work": 6198, - "##ft": 6199, - "##nia": 6200, - "cooper": 6201, - "trading": 6202, - "dear": 6203, - "conduct": 6204, - "91": 6205, - "illegal": 6206, - "apple": 6207, - "revolutionary": 6208, - "holiday": 6209, - "definition": 6210, - "harder": 6211, - "##van": 6212, - "jacob": 6213, - "circumstances": 6214, - "destruction": 6215, - "##lle": 6216, - "popularity": 6217, - "grip": 6218, - "classified": 6219, - "liverpool": 6220, - "donald": 6221, - "baltimore": 6222, - "flows": 6223, - "seeking": 6224, - "honour": 6225, - "approval": 6226, - "92": 6227, - "mechanical": 6228, - "till": 6229, - "happening": 6230, - "statue": 6231, - "critic": 6232, - "increasingly": 6233, - "immediate": 6234, - "describe": 6235, - "commerce": 6236, - "stare": 6237, - "##ster": 6238, - "indonesia": 6239, - "meat": 6240, - "rounds": 6241, - "boats": 6242, - "baker": 6243, - "orthodox": 6244, - "depression": 6245, - "formally": 6246, - "worn": 6247, - "naked": 6248, - "claire": 6249, - "muttered": 6250, - "sentence": 6251, - "11th": 6252, - "emily": 6253, - "document": 6254, - "77": 6255, - "criticism": 6256, - "wished": 6257, - "vessel": 6258, - "spiritual": 6259, - "bent": 6260, - "virgin": 6261, - "parker": 6262, - "minimum": 6263, - "murray": 6264, - "lunch": 6265, - "danny": 6266, - "printed": 6267, - "compilation": 6268, - "keyboards": 6269, - "false": 6270, - "blow": 6271, - "belonged": 6272, - "68": 6273, - "raising": 6274, - "78": 6275, - "cutting": 6276, - "##board": 6277, - "pittsburgh": 6278, - "##up": 6279, - "9th": 6280, - "shadows": 6281, - "81": 6282, - "hated": 6283, - "indigenous": 6284, - "jon": 6285, - "15th": 6286, - "barry": 6287, - "scholar": 6288, - "ah": 6289, - "##zer": 6290, - "oliver": 6291, - "##gy": 6292, - "stick": 6293, - "susan": 6294, - "meetings": 6295, - "attracted": 6296, - "spell": 6297, - "romantic": 6298, - "##ver": 6299, - "ye": 6300, - "1895": 6301, - "photo": 6302, - "demanded": 6303, - "customers": 6304, - "##ac": 6305, - "1896": 6306, - "logan": 6307, - "revival": 6308, - "keys": 6309, - "modified": 6310, - "commanded": 6311, - "jeans": 6312, - "##ious": 6313, - "upset": 6314, - "raw": 6315, - "phil": 6316, - "detective": 6317, - "hiding": 6318, - "resident": 6319, - "vincent": 6320, - "##bly": 6321, - "experiences": 6322, - "diamond": 6323, - "defeating": 6324, - "coverage": 6325, - "lucas": 6326, - "external": 6327, - "parks": 6328, - "franchise": 6329, - "helen": 6330, - "bible": 6331, - "successor": 6332, - "percussion": 6333, - "celebrated": 6334, - "il": 6335, - "lift": 6336, - "profile": 6337, - "clan": 6338, - "romania": 6339, - "##ied": 6340, - "mills": 6341, - "##su": 6342, - "nobody": 6343, - "achievement": 6344, - "shrugged": 6345, - "fault": 6346, - "1897": 6347, - "rhythm": 6348, - "initiative": 6349, - "breakfast": 6350, - "carbon": 6351, - "700": 6352, - "69": 6353, - "lasted": 6354, - "violent": 6355, - "74": 6356, - "wound": 6357, - "ken": 6358, - "killer": 6359, - "gradually": 6360, - "filmed": 6361, - "°c": 6362, - "dollars": 6363, - "processing": 6364, - "94": 6365, - "remove": 6366, - "criticized": 6367, - "guests": 6368, - "sang": 6369, - "chemistry": 6370, - "##vin": 6371, - "legislature": 6372, - "disney": 6373, - "##bridge": 6374, - "uniform": 6375, - "escaped": 6376, - "integrated": 6377, - "proposal": 6378, - "purple": 6379, - "denied": 6380, - "liquid": 6381, - "karl": 6382, - "influential": 6383, - "morris": 6384, - "nights": 6385, - "stones": 6386, - "intense": 6387, - "experimental": 6388, - "twisted": 6389, - "71": 6390, - "84": 6391, - "##ld": 6392, - "pace": 6393, - "nazi": 6394, - "mitchell": 6395, - "ny": 6396, - "blind": 6397, - "reporter": 6398, - "newspapers": 6399, - "14th": 6400, - "centers": 6401, - "burn": 6402, - "basin": 6403, - "forgotten": 6404, - "surviving": 6405, - "filed": 6406, - "collections": 6407, - "monastery": 6408, - "losses": 6409, - "manual": 6410, - "couch": 6411, - "description": 6412, - "appropriate": 6413, - "merely": 6414, - "tag": 6415, - "missions": 6416, - "sebastian": 6417, - "restoration": 6418, - "replacing": 6419, - "triple": 6420, - "73": 6421, - "elder": 6422, - "julia": 6423, - "warriors": 6424, - "benjamin": 6425, - "julian": 6426, - "convinced": 6427, - "stronger": 6428, - "amazing": 6429, - "declined": 6430, - "versus": 6431, - "merchant": 6432, - "happens": 6433, - "output": 6434, - "finland": 6435, - "bare": 6436, - "barbara": 6437, - "absence": 6438, - "ignored": 6439, - "dawn": 6440, - "injuries": 6441, - "##port": 6442, - "producers": 6443, - "##ram": 6444, - "82": 6445, - "luis": 6446, - "##ities": 6447, - "kw": 6448, - "admit": 6449, - "expensive": 6450, - "electricity": 6451, - "nba": 6452, - "exception": 6453, - "symbol": 6454, - "##ving": 6455, - "ladies": 6456, - "shower": 6457, - "sheriff": 6458, - "characteristics": 6459, - "##je": 6460, - "aimed": 6461, - "button": 6462, - "ratio": 6463, - "effectively": 6464, - "summit": 6465, - "angle": 6466, - "jury": 6467, - "bears": 6468, - "foster": 6469, - "vessels": 6470, - "pants": 6471, - "executed": 6472, - "evans": 6473, - "dozen": 6474, - "advertising": 6475, - "kicked": 6476, - "patrol": 6477, - "1889": 6478, - "competitions": 6479, - "lifetime": 6480, - "principles": 6481, - "athletics": 6482, - "##logy": 6483, - "birmingham": 6484, - "sponsored": 6485, - "89": 6486, - "rob": 6487, - "nomination": 6488, - "1893": 6489, - "acoustic": 6490, - "##sm": 6491, - "creature": 6492, - "longest": 6493, - "##tra": 6494, - "credits": 6495, - "harbor": 6496, - "dust": 6497, - "josh": 6498, - "##so": 6499, - "territories": 6500, - "milk": 6501, - "infrastructure": 6502, - "completion": 6503, - "thailand": 6504, - "indians": 6505, - "leon": 6506, - "archbishop": 6507, - "##sy": 6508, - "assist": 6509, - "pitch": 6510, - "blake": 6511, - "arrangement": 6512, - "girlfriend": 6513, - "serbian": 6514, - "operational": 6515, - "hence": 6516, - "sad": 6517, - "scent": 6518, - "fur": 6519, - "dj": 6520, - "sessions": 6521, - "hp": 6522, - "refer": 6523, - "rarely": 6524, - "##ora": 6525, - "exists": 6526, - "1892": 6527, - "##ten": 6528, - "scientists": 6529, - "dirty": 6530, - "penalty": 6531, - "burst": 6532, - "portrait": 6533, - "seed": 6534, - "79": 6535, - "pole": 6536, - "limits": 6537, - "rival": 6538, - "1894": 6539, - "stable": 6540, - "alpha": 6541, - "grave": 6542, - "constitutional": 6543, - "alcohol": 6544, - "arrest": 6545, - "flower": 6546, - "mystery": 6547, - "devil": 6548, - "architectural": 6549, - "relationships": 6550, - "greatly": 6551, - "habitat": 6552, - "##istic": 6553, - "larry": 6554, - "progressive": 6555, - "remote": 6556, - "cotton": 6557, - "##ics": 6558, - "##ok": 6559, - "preserved": 6560, - "reaches": 6561, - "##ming": 6562, - "cited": 6563, - "86": 6564, - "vast": 6565, - "scholarship": 6566, - "decisions": 6567, - "cbs": 6568, - "joy": 6569, - "teach": 6570, - "1885": 6571, - "editions": 6572, - "knocked": 6573, - "eve": 6574, - "searching": 6575, - "partly": 6576, - "participation": 6577, - "gap": 6578, - "animated": 6579, - "fate": 6580, - "excellent": 6581, - "##ett": 6582, - "na": 6583, - "87": 6584, - "alternate": 6585, - "saints": 6586, - "youngest": 6587, - "##ily": 6588, - "climbed": 6589, - "##ita": 6590, - "##tors": 6591, - "suggest": 6592, - "##ct": 6593, - "discussion": 6594, - "staying": 6595, - "choir": 6596, - "lakes": 6597, - "jacket": 6598, - "revenue": 6599, - "nevertheless": 6600, - "peaked": 6601, - "instrument": 6602, - "wondering": 6603, - "annually": 6604, - "managing": 6605, - "neil": 6606, - "1891": 6607, - "signing": 6608, - "terry": 6609, - "##ice": 6610, - "apply": 6611, - "clinical": 6612, - "brooklyn": 6613, - "aim": 6614, - "catherine": 6615, - "fuck": 6616, - "farmers": 6617, - "figured": 6618, - "ninth": 6619, - "pride": 6620, - "hugh": 6621, - "evolution": 6622, - "ordinary": 6623, - "involvement": 6624, - "comfortable": 6625, - "shouted": 6626, - "tech": 6627, - "encouraged": 6628, - "taiwan": 6629, - "representation": 6630, - "sharing": 6631, - "##lia": 6632, - "##em": 6633, - "panic": 6634, - "exact": 6635, - "cargo": 6636, - "competing": 6637, - "fat": 6638, - "cried": 6639, - "83": 6640, - "1920s": 6641, - "occasions": 6642, - "pa": 6643, - "cabin": 6644, - "borders": 6645, - "utah": 6646, - "marcus": 6647, - "##isation": 6648, - "badly": 6649, - "muscles": 6650, - "##ance": 6651, - "victorian": 6652, - "transition": 6653, - "warner": 6654, - "bet": 6655, - "permission": 6656, - "##rin": 6657, - "slave": 6658, - "terrible": 6659, - "similarly": 6660, - "shares": 6661, - "seth": 6662, - "uefa": 6663, - "possession": 6664, - "medals": 6665, - "benefits": 6666, - "colleges": 6667, - "lowered": 6668, - "perfectly": 6669, - "mall": 6670, - "transit": 6671, - "##ye": 6672, - "##kar": 6673, - "publisher": 6674, - "##ened": 6675, - "harrison": 6676, - "deaths": 6677, - "elevation": 6678, - "##ae": 6679, - "asleep": 6680, - "machines": 6681, - "sigh": 6682, - "ash": 6683, - "hardly": 6684, - "argument": 6685, - "occasion": 6686, - "parent": 6687, - "leo": 6688, - "decline": 6689, - "1888": 6690, - "contribution": 6691, - "##ua": 6692, - "concentration": 6693, - "1000": 6694, - "opportunities": 6695, - "hispanic": 6696, - "guardian": 6697, - "extent": 6698, - "emotions": 6699, - "hips": 6700, - "mason": 6701, - "volumes": 6702, - "bloody": 6703, - "controversy": 6704, - "diameter": 6705, - "steady": 6706, - "mistake": 6707, - "phoenix": 6708, - "identify": 6709, - "violin": 6710, - "##sk": 6711, - "departure": 6712, - "richmond": 6713, - "spin": 6714, - "funeral": 6715, - "enemies": 6716, - "1864": 6717, - "gear": 6718, - "literally": 6719, - "connor": 6720, - "random": 6721, - "sergeant": 6722, - "grab": 6723, - "confusion": 6724, - "1865": 6725, - "transmission": 6726, - "informed": 6727, - "op": 6728, - "leaning": 6729, - "sacred": 6730, - "suspended": 6731, - "thinks": 6732, - "gates": 6733, - "portland": 6734, - "luck": 6735, - "agencies": 6736, - "yours": 6737, - "hull": 6738, - "expert": 6739, - "muscle": 6740, - "layer": 6741, - "practical": 6742, - "sculpture": 6743, - "jerusalem": 6744, - "latest": 6745, - "lloyd": 6746, - "statistics": 6747, - "deeper": 6748, - "recommended": 6749, - "warrior": 6750, - "arkansas": 6751, - "mess": 6752, - "supports": 6753, - "greg": 6754, - "eagle": 6755, - "1880": 6756, - "recovered": 6757, - "rated": 6758, - "concerts": 6759, - "rushed": 6760, - "##ano": 6761, - "stops": 6762, - "eggs": 6763, - "files": 6764, - "premiere": 6765, - "keith": 6766, - "##vo": 6767, - "delhi": 6768, - "turner": 6769, - "pit": 6770, - "affair": 6771, - "belief": 6772, - "paint": 6773, - "##zing": 6774, - "mate": 6775, - "##ach": 6776, - "##ev": 6777, - "victim": 6778, - "##ology": 6779, - "withdrew": 6780, - "bonus": 6781, - "styles": 6782, - "fled": 6783, - "##ud": 6784, - "glasgow": 6785, - "technologies": 6786, - "funded": 6787, - "nbc": 6788, - "adaptation": 6789, - "##ata": 6790, - "portrayed": 6791, - "cooperation": 6792, - "supporters": 6793, - "judges": 6794, - "bernard": 6795, - "justin": 6796, - "hallway": 6797, - "ralph": 6798, - "##ick": 6799, - "graduating": 6800, - "controversial": 6801, - "distant": 6802, - "continental": 6803, - "spider": 6804, - "bite": 6805, - "##ho": 6806, - "recognize": 6807, - "intention": 6808, - "mixing": 6809, - "##ese": 6810, - "egyptian": 6811, - "bow": 6812, - "tourism": 6813, - "suppose": 6814, - "claiming": 6815, - "tiger": 6816, - "dominated": 6817, - "participants": 6818, - "vi": 6819, - "##ru": 6820, - "nurse": 6821, - "partially": 6822, - "tape": 6823, - "##rum": 6824, - "psychology": 6825, - "##rn": 6826, - "essential": 6827, - "touring": 6828, - "duo": 6829, - "voting": 6830, - "civilian": 6831, - "emotional": 6832, - "channels": 6833, - "##king": 6834, - "apparent": 6835, - "hebrew": 6836, - "1887": 6837, - "tommy": 6838, - "carrier": 6839, - "intersection": 6840, - "beast": 6841, - "hudson": 6842, - "##gar": 6843, - "##zo": 6844, - "lab": 6845, - "nova": 6846, - "bench": 6847, - "discuss": 6848, - "costa": 6849, - "##ered": 6850, - "detailed": 6851, - "behalf": 6852, - "drivers": 6853, - "unfortunately": 6854, - "obtain": 6855, - "##lis": 6856, - "rocky": 6857, - "##dae": 6858, - "siege": 6859, - "friendship": 6860, - "honey": 6861, - "##rian": 6862, - "1861": 6863, - "amy": 6864, - "hang": 6865, - "posted": 6866, - "governments": 6867, - "collins": 6868, - "respond": 6869, - "wildlife": 6870, - "preferred": 6871, - "operator": 6872, - "##po": 6873, - "laura": 6874, - "pregnant": 6875, - "videos": 6876, - "dennis": 6877, - "suspected": 6878, - "boots": 6879, - "instantly": 6880, - "weird": 6881, - "automatic": 6882, - "businessman": 6883, - "alleged": 6884, - "placing": 6885, - "throwing": 6886, - "ph": 6887, - "mood": 6888, - "1862": 6889, - "perry": 6890, - "venue": 6891, - "jet": 6892, - "remainder": 6893, - "##lli": 6894, - "##ci": 6895, - "passion": 6896, - "biological": 6897, - "boyfriend": 6898, - "1863": 6899, - "dirt": 6900, - "buffalo": 6901, - "ron": 6902, - "segment": 6903, - "fa": 6904, - "abuse": 6905, - "##era": 6906, - "genre": 6907, - "thrown": 6908, - "stroke": 6909, - "colored": 6910, - "stress": 6911, - "exercise": 6912, - "displayed": 6913, - "##gen": 6914, - "struggled": 6915, - "##tti": 6916, - "abroad": 6917, - "dramatic": 6918, - "wonderful": 6919, - "thereafter": 6920, - "madrid": 6921, - "component": 6922, - "widespread": 6923, - "##sed": 6924, - "tale": 6925, - "citizen": 6926, - "todd": 6927, - "monday": 6928, - "1886": 6929, - "vancouver": 6930, - "overseas": 6931, - "forcing": 6932, - "crying": 6933, - "descent": 6934, - "##ris": 6935, - "discussed": 6936, - "substantial": 6937, - "ranks": 6938, - "regime": 6939, - "1870": 6940, - "provinces": 6941, - "switch": 6942, - "drum": 6943, - "zane": 6944, - "ted": 6945, - "tribes": 6946, - "proof": 6947, - "lp": 6948, - "cream": 6949, - "researchers": 6950, - "volunteer": 6951, - "manor": 6952, - "silk": 6953, - "milan": 6954, - "donated": 6955, - "allies": 6956, - "venture": 6957, - "principle": 6958, - "delivery": 6959, - "enterprise": 6960, - "##ves": 6961, - "##ans": 6962, - "bars": 6963, - "traditionally": 6964, - "witch": 6965, - "reminded": 6966, - "copper": 6967, - "##uk": 6968, - "pete": 6969, - "inter": 6970, - "links": 6971, - "colin": 6972, - "grinned": 6973, - "elsewhere": 6974, - "competitive": 6975, - "frequent": 6976, - "##oy": 6977, - "scream": 6978, - "##hu": 6979, - "tension": 6980, - "texts": 6981, - "submarine": 6982, - "finnish": 6983, - "defending": 6984, - "defend": 6985, - "pat": 6986, - "detail": 6987, - "1884": 6988, - "affiliated": 6989, - "stuart": 6990, - "themes": 6991, - "villa": 6992, - "periods": 6993, - "tool": 6994, - "belgian": 6995, - "ruling": 6996, - "crimes": 6997, - "answers": 6998, - "folded": 6999, - "licensed": 7000, - "resort": 7001, - "demolished": 7002, - "hans": 7003, - "lucy": 7004, - "1881": 7005, - "lion": 7006, - "traded": 7007, - "photographs": 7008, - "writes": 7009, - "craig": 7010, - "##fa": 7011, - "trials": 7012, - "generated": 7013, - "beth": 7014, - "noble": 7015, - "debt": 7016, - "percentage": 7017, - "yorkshire": 7018, - "erected": 7019, - "ss": 7020, - "viewed": 7021, - "grades": 7022, - "confidence": 7023, - "ceased": 7024, - "islam": 7025, - "telephone": 7026, - "retail": 7027, - "##ible": 7028, - "chile": 7029, - "m²": 7030, - "roberts": 7031, - "sixteen": 7032, - "##ich": 7033, - "commented": 7034, - "hampshire": 7035, - "innocent": 7036, - "dual": 7037, - "pounds": 7038, - "checked": 7039, - "regulations": 7040, - "afghanistan": 7041, - "sung": 7042, - "rico": 7043, - "liberty": 7044, - "assets": 7045, - "bigger": 7046, - "options": 7047, - "angels": 7048, - "relegated": 7049, - "tribute": 7050, - "wells": 7051, - "attending": 7052, - "leaf": 7053, - "##yan": 7054, - "butler": 7055, - "romanian": 7056, - "forum": 7057, - "monthly": 7058, - "lisa": 7059, - "patterns": 7060, - "gmina": 7061, - "##tory": 7062, - "madison": 7063, - "hurricane": 7064, - "rev": 7065, - "##ians": 7066, - "bristol": 7067, - "##ula": 7068, - "elite": 7069, - "valuable": 7070, - "disaster": 7071, - "democracy": 7072, - "awareness": 7073, - "germans": 7074, - "freyja": 7075, - "##ins": 7076, - "loop": 7077, - "absolutely": 7078, - "paying": 7079, - "populations": 7080, - "maine": 7081, - "sole": 7082, - "prayer": 7083, - "spencer": 7084, - "releases": 7085, - "doorway": 7086, - "bull": 7087, - "##ani": 7088, - "lover": 7089, - "midnight": 7090, - "conclusion": 7091, - "##sson": 7092, - "thirteen": 7093, - "lily": 7094, - "mediterranean": 7095, - "##lt": 7096, - "nhl": 7097, - "proud": 7098, - "sample": 7099, - "##hill": 7100, - "drummer": 7101, - "guinea": 7102, - "##ova": 7103, - "murphy": 7104, - "climb": 7105, - "##ston": 7106, - "instant": 7107, - "attributed": 7108, - "horn": 7109, - "ain": 7110, - "railways": 7111, - "steven": 7112, - "##ao": 7113, - "autumn": 7114, - "ferry": 7115, - "opponent": 7116, - "root": 7117, - "traveling": 7118, - "secured": 7119, - "corridor": 7120, - "stretched": 7121, - "tales": 7122, - "sheet": 7123, - "trinity": 7124, - "cattle": 7125, - "helps": 7126, - "indicates": 7127, - "manhattan": 7128, - "murdered": 7129, - "fitted": 7130, - "1882": 7131, - "gentle": 7132, - "grandmother": 7133, - "mines": 7134, - "shocked": 7135, - "vegas": 7136, - "produces": 7137, - "##light": 7138, - "caribbean": 7139, - "##ou": 7140, - "belong": 7141, - "continuous": 7142, - "desperate": 7143, - "drunk": 7144, - "historically": 7145, - "trio": 7146, - "waved": 7147, - "raf": 7148, - "dealing": 7149, - "nathan": 7150, - "bat": 7151, - "murmured": 7152, - "interrupted": 7153, - "residing": 7154, - "scientist": 7155, - "pioneer": 7156, - "harold": 7157, - "aaron": 7158, - "##net": 7159, - "delta": 7160, - "attempting": 7161, - "minority": 7162, - "mini": 7163, - "believes": 7164, - "chorus": 7165, - "tend": 7166, - "lots": 7167, - "eyed": 7168, - "indoor": 7169, - "load": 7170, - "shots": 7171, - "updated": 7172, - "jail": 7173, - "##llo": 7174, - "concerning": 7175, - "connecting": 7176, - "wealth": 7177, - "##ved": 7178, - "slaves": 7179, - "arrive": 7180, - "rangers": 7181, - "sufficient": 7182, - "rebuilt": 7183, - "##wick": 7184, - "cardinal": 7185, - "flood": 7186, - "muhammad": 7187, - "whenever": 7188, - "relation": 7189, - "runners": 7190, - "moral": 7191, - "repair": 7192, - "viewers": 7193, - "arriving": 7194, - "revenge": 7195, - "punk": 7196, - "assisted": 7197, - "bath": 7198, - "fairly": 7199, - "breathe": 7200, - "lists": 7201, - "innings": 7202, - "illustrated": 7203, - "whisper": 7204, - "nearest": 7205, - "voters": 7206, - "clinton": 7207, - "ties": 7208, - "ultimate": 7209, - "screamed": 7210, - "beijing": 7211, - "lions": 7212, - "andre": 7213, - "fictional": 7214, - "gathering": 7215, - "comfort": 7216, - "radar": 7217, - "suitable": 7218, - "dismissed": 7219, - "hms": 7220, - "ban": 7221, - "pine": 7222, - "wrist": 7223, - "atmosphere": 7224, - "voivodeship": 7225, - "bid": 7226, - "timber": 7227, - "##ned": 7228, - "##nan": 7229, - "giants": 7230, - "##ane": 7231, - "cameron": 7232, - "recovery": 7233, - "uss": 7234, - "identical": 7235, - "categories": 7236, - "switched": 7237, - "serbia": 7238, - "laughter": 7239, - "noah": 7240, - "ensemble": 7241, - "therapy": 7242, - "peoples": 7243, - "touching": 7244, - "##off": 7245, - "locally": 7246, - "pearl": 7247, - "platforms": 7248, - "everywhere": 7249, - "ballet": 7250, - "tables": 7251, - "lanka": 7252, - "herbert": 7253, - "outdoor": 7254, - "toured": 7255, - "derek": 7256, - "1883": 7257, - "spaces": 7258, - "contested": 7259, - "swept": 7260, - "1878": 7261, - "exclusive": 7262, - "slight": 7263, - "connections": 7264, - "##dra": 7265, - "winds": 7266, - "prisoner": 7267, - "collective": 7268, - "bangladesh": 7269, - "tube": 7270, - "publicly": 7271, - "wealthy": 7272, - "thai": 7273, - "##ys": 7274, - "isolated": 7275, - "select": 7276, - "##ric": 7277, - "insisted": 7278, - "pen": 7279, - "fortune": 7280, - "ticket": 7281, - "spotted": 7282, - "reportedly": 7283, - "animation": 7284, - "enforcement": 7285, - "tanks": 7286, - "110": 7287, - "decides": 7288, - "wider": 7289, - "lowest": 7290, - "owen": 7291, - "##time": 7292, - "nod": 7293, - "hitting": 7294, - "##hn": 7295, - "gregory": 7296, - "furthermore": 7297, - "magazines": 7298, - "fighters": 7299, - "solutions": 7300, - "##ery": 7301, - "pointing": 7302, - "requested": 7303, - "peru": 7304, - "reed": 7305, - "chancellor": 7306, - "knights": 7307, - "mask": 7308, - "worker": 7309, - "eldest": 7310, - "flames": 7311, - "reduction": 7312, - "1860": 7313, - "volunteers": 7314, - "##tis": 7315, - "reporting": 7316, - "##hl": 7317, - "wire": 7318, - "advisory": 7319, - "endemic": 7320, - "origins": 7321, - "settlers": 7322, - "pursue": 7323, - "knock": 7324, - "consumer": 7325, - "1876": 7326, - "eu": 7327, - "compound": 7328, - "creatures": 7329, - "mansion": 7330, - "sentenced": 7331, - "ivan": 7332, - "deployed": 7333, - "guitars": 7334, - "frowned": 7335, - "involves": 7336, - "mechanism": 7337, - "kilometers": 7338, - "perspective": 7339, - "shops": 7340, - "maps": 7341, - "terminus": 7342, - "duncan": 7343, - "alien": 7344, - "fist": 7345, - "bridges": 7346, - "##pers": 7347, - "heroes": 7348, - "fed": 7349, - "derby": 7350, - "swallowed": 7351, - "##ros": 7352, - "patent": 7353, - "sara": 7354, - "illness": 7355, - "characterized": 7356, - "adventures": 7357, - "slide": 7358, - "hawaii": 7359, - "jurisdiction": 7360, - "##op": 7361, - "organised": 7362, - "##side": 7363, - "adelaide": 7364, - "walks": 7365, - "biology": 7366, - "se": 7367, - "##ties": 7368, - "rogers": 7369, - "swing": 7370, - "tightly": 7371, - "boundaries": 7372, - "##rie": 7373, - "prepare": 7374, - "implementation": 7375, - "stolen": 7376, - "##sha": 7377, - "certified": 7378, - "colombia": 7379, - "edwards": 7380, - "garage": 7381, - "##mm": 7382, - "recalled": 7383, - "##ball": 7384, - "rage": 7385, - "harm": 7386, - "nigeria": 7387, - "breast": 7388, - "##ren": 7389, - "furniture": 7390, - "pupils": 7391, - "settle": 7392, - "##lus": 7393, - "cuba": 7394, - "balls": 7395, - "client": 7396, - "alaska": 7397, - "21st": 7398, - "linear": 7399, - "thrust": 7400, - "celebration": 7401, - "latino": 7402, - "genetic": 7403, - "terror": 7404, - "##cia": 7405, - "##ening": 7406, - "lightning": 7407, - "fee": 7408, - "witness": 7409, - "lodge": 7410, - "establishing": 7411, - "skull": 7412, - "##ique": 7413, - "earning": 7414, - "hood": 7415, - "##ei": 7416, - "rebellion": 7417, - "wang": 7418, - "sporting": 7419, - "warned": 7420, - "missile": 7421, - "devoted": 7422, - "activist": 7423, - "porch": 7424, - "worship": 7425, - "fourteen": 7426, - "package": 7427, - "1871": 7428, - "decorated": 7429, - "##shire": 7430, - "housed": 7431, - "##ock": 7432, - "chess": 7433, - "sailed": 7434, - "doctors": 7435, - "oscar": 7436, - "joan": 7437, - "treat": 7438, - "garcia": 7439, - "harbour": 7440, - "jeremy": 7441, - "##ire": 7442, - "traditions": 7443, - "dominant": 7444, - "jacques": 7445, - "##gon": 7446, - "##wan": 7447, - "relocated": 7448, - "1879": 7449, - "amendment": 7450, - "sized": 7451, - "companion": 7452, - "simultaneously": 7453, - "volleyball": 7454, - "spun": 7455, - "acre": 7456, - "increases": 7457, - "stopping": 7458, - "loves": 7459, - "belongs": 7460, - "affect": 7461, - "drafted": 7462, - "tossed": 7463, - "scout": 7464, - "battles": 7465, - "1875": 7466, - "filming": 7467, - "shoved": 7468, - "munich": 7469, - "tenure": 7470, - "vertical": 7471, - "romance": 7472, - "pc": 7473, - "##cher": 7474, - "argue": 7475, - "##ical": 7476, - "craft": 7477, - "ranging": 7478, - "www": 7479, - "opens": 7480, - "honest": 7481, - "tyler": 7482, - "yesterday": 7483, - "virtual": 7484, - "##let": 7485, - "muslims": 7486, - "reveal": 7487, - "snake": 7488, - "immigrants": 7489, - "radical": 7490, - "screaming": 7491, - "speakers": 7492, - "firing": 7493, - "saving": 7494, - "belonging": 7495, - "ease": 7496, - "lighting": 7497, - "prefecture": 7498, - "blame": 7499, - "farmer": 7500, - "hungry": 7501, - "grows": 7502, - "rubbed": 7503, - "beam": 7504, - "sur": 7505, - "subsidiary": 7506, - "##cha": 7507, - "armenian": 7508, - "sao": 7509, - "dropping": 7510, - "conventional": 7511, - "##fer": 7512, - "microsoft": 7513, - "reply": 7514, - "qualify": 7515, - "spots": 7516, - "1867": 7517, - "sweat": 7518, - "festivals": 7519, - "##ken": 7520, - "immigration": 7521, - "physician": 7522, - "discover": 7523, - "exposure": 7524, - "sandy": 7525, - "explanation": 7526, - "isaac": 7527, - "implemented": 7528, - "##fish": 7529, - "hart": 7530, - "initiated": 7531, - "connect": 7532, - "stakes": 7533, - "presents": 7534, - "heights": 7535, - "householder": 7536, - "pleased": 7537, - "tourist": 7538, - "regardless": 7539, - "slip": 7540, - "closest": 7541, - "##ction": 7542, - "surely": 7543, - "sultan": 7544, - "brings": 7545, - "riley": 7546, - "preparation": 7547, - "aboard": 7548, - "slammed": 7549, - "baptist": 7550, - "experiment": 7551, - "ongoing": 7552, - "interstate": 7553, - "organic": 7554, - "playoffs": 7555, - "##ika": 7556, - "1877": 7557, - "130": 7558, - "##tar": 7559, - "hindu": 7560, - "error": 7561, - "tours": 7562, - "tier": 7563, - "plenty": 7564, - "arrangements": 7565, - "talks": 7566, - "trapped": 7567, - "excited": 7568, - "sank": 7569, - "ho": 7570, - "athens": 7571, - "1872": 7572, - "denver": 7573, - "welfare": 7574, - "suburb": 7575, - "athletes": 7576, - "trick": 7577, - "diverse": 7578, - "belly": 7579, - "exclusively": 7580, - "yelled": 7581, - "1868": 7582, - "##med": 7583, - "conversion": 7584, - "##ette": 7585, - "1874": 7586, - "internationally": 7587, - "computers": 7588, - "conductor": 7589, - "abilities": 7590, - "sensitive": 7591, - "hello": 7592, - "dispute": 7593, - "measured": 7594, - "globe": 7595, - "rocket": 7596, - "prices": 7597, - "amsterdam": 7598, - "flights": 7599, - "tigers": 7600, - "inn": 7601, - "municipalities": 7602, - "emotion": 7603, - "references": 7604, - "3d": 7605, - "##mus": 7606, - "explains": 7607, - "airlines": 7608, - "manufactured": 7609, - "pm": 7610, - "archaeological": 7611, - "1873": 7612, - "interpretation": 7613, - "devon": 7614, - "comment": 7615, - "##ites": 7616, - "settlements": 7617, - "kissing": 7618, - "absolute": 7619, - "improvement": 7620, - "suite": 7621, - "impressed": 7622, - "barcelona": 7623, - "sullivan": 7624, - "jefferson": 7625, - "towers": 7626, - "jesse": 7627, - "julie": 7628, - "##tin": 7629, - "##lu": 7630, - "grandson": 7631, - "hi": 7632, - "gauge": 7633, - "regard": 7634, - "rings": 7635, - "interviews": 7636, - "trace": 7637, - "raymond": 7638, - "thumb": 7639, - "departments": 7640, - "burns": 7641, - "serial": 7642, - "bulgarian": 7643, - "scores": 7644, - "demonstrated": 7645, - "##ix": 7646, - "1866": 7647, - "kyle": 7648, - "alberta": 7649, - "underneath": 7650, - "romanized": 7651, - "##ward": 7652, - "relieved": 7653, - "acquisition": 7654, - "phrase": 7655, - "cliff": 7656, - "reveals": 7657, - "han": 7658, - "cuts": 7659, - "merger": 7660, - "custom": 7661, - "##dar": 7662, - "nee": 7663, - "gilbert": 7664, - "graduation": 7665, - "##nts": 7666, - "assessment": 7667, - "cafe": 7668, - "difficulty": 7669, - "demands": 7670, - "swung": 7671, - "democrat": 7672, - "jennifer": 7673, - "commons": 7674, - "1940s": 7675, - "grove": 7676, - "##yo": 7677, - "completing": 7678, - "focuses": 7679, - "sum": 7680, - "substitute": 7681, - "bearing": 7682, - "stretch": 7683, - "reception": 7684, - "##py": 7685, - "reflected": 7686, - "essentially": 7687, - "destination": 7688, - "pairs": 7689, - "##ched": 7690, - "survival": 7691, - "resource": 7692, - "##bach": 7693, - "promoting": 7694, - "doubles": 7695, - "messages": 7696, - "tear": 7697, - "##down": 7698, - "##fully": 7699, - "parade": 7700, - "florence": 7701, - "harvey": 7702, - "incumbent": 7703, - "partial": 7704, - "framework": 7705, - "900": 7706, - "pedro": 7707, - "frozen": 7708, - "procedure": 7709, - "olivia": 7710, - "controls": 7711, - "##mic": 7712, - "shelter": 7713, - "personally": 7714, - "temperatures": 7715, - "##od": 7716, - "brisbane": 7717, - "tested": 7718, - "sits": 7719, - "marble": 7720, - "comprehensive": 7721, - "oxygen": 7722, - "leonard": 7723, - "##kov": 7724, - "inaugural": 7725, - "iranian": 7726, - "referring": 7727, - "quarters": 7728, - "attitude": 7729, - "##ivity": 7730, - "mainstream": 7731, - "lined": 7732, - "mars": 7733, - "dakota": 7734, - "norfolk": 7735, - "unsuccessful": 7736, - "##°": 7737, - "explosion": 7738, - "helicopter": 7739, - "congressional": 7740, - "##sing": 7741, - "inspector": 7742, - "bitch": 7743, - "seal": 7744, - "departed": 7745, - "divine": 7746, - "##ters": 7747, - "coaching": 7748, - "examination": 7749, - "punishment": 7750, - "manufacturer": 7751, - "sink": 7752, - "columns": 7753, - "unincorporated": 7754, - "signals": 7755, - "nevada": 7756, - "squeezed": 7757, - "dylan": 7758, - "dining": 7759, - "photos": 7760, - "martial": 7761, - "manuel": 7762, - "eighteen": 7763, - "elevator": 7764, - "brushed": 7765, - "plates": 7766, - "ministers": 7767, - "ivy": 7768, - "congregation": 7769, - "##len": 7770, - "slept": 7771, - "specialized": 7772, - "taxes": 7773, - "curve": 7774, - "restricted": 7775, - "negotiations": 7776, - "likes": 7777, - "statistical": 7778, - "arnold": 7779, - "inspiration": 7780, - "execution": 7781, - "bold": 7782, - "intermediate": 7783, - "significance": 7784, - "margin": 7785, - "ruler": 7786, - "wheels": 7787, - "gothic": 7788, - "intellectual": 7789, - "dependent": 7790, - "listened": 7791, - "eligible": 7792, - "buses": 7793, - "widow": 7794, - "syria": 7795, - "earn": 7796, - "cincinnati": 7797, - "collapsed": 7798, - "recipient": 7799, - "secrets": 7800, - "accessible": 7801, - "philippine": 7802, - "maritime": 7803, - "goddess": 7804, - "clerk": 7805, - "surrender": 7806, - "breaks": 7807, - "playoff": 7808, - "database": 7809, - "##ified": 7810, - "##lon": 7811, - "ideal": 7812, - "beetle": 7813, - "aspect": 7814, - "soap": 7815, - "regulation": 7816, - "strings": 7817, - "expand": 7818, - "anglo": 7819, - "shorter": 7820, - "crosses": 7821, - "retreat": 7822, - "tough": 7823, - "coins": 7824, - "wallace": 7825, - "directions": 7826, - "pressing": 7827, - "##oon": 7828, - "shipping": 7829, - "locomotives": 7830, - "comparison": 7831, - "topics": 7832, - "nephew": 7833, - "##mes": 7834, - "distinction": 7835, - "honors": 7836, - "travelled": 7837, - "sierra": 7838, - "ibn": 7839, - "##over": 7840, - "fortress": 7841, - "sa": 7842, - "recognised": 7843, - "carved": 7844, - "1869": 7845, - "clients": 7846, - "##dan": 7847, - "intent": 7848, - "##mar": 7849, - "coaches": 7850, - "describing": 7851, - "bread": 7852, - "##ington": 7853, - "beaten": 7854, - "northwestern": 7855, - "##ona": 7856, - "merit": 7857, - "youtube": 7858, - "collapse": 7859, - "challenges": 7860, - "em": 7861, - "historians": 7862, - "objective": 7863, - "submitted": 7864, - "virus": 7865, - "attacking": 7866, - "drake": 7867, - "assume": 7868, - "##ere": 7869, - "diseases": 7870, - "marc": 7871, - "stem": 7872, - "leeds": 7873, - "##cus": 7874, - "##ab": 7875, - "farming": 7876, - "glasses": 7877, - "##lock": 7878, - "visits": 7879, - "nowhere": 7880, - "fellowship": 7881, - "relevant": 7882, - "carries": 7883, - "restaurants": 7884, - "experiments": 7885, - "101": 7886, - "constantly": 7887, - "bases": 7888, - "targets": 7889, - "shah": 7890, - "tenth": 7891, - "opponents": 7892, - "verse": 7893, - "territorial": 7894, - "##ira": 7895, - "writings": 7896, - "corruption": 7897, - "##hs": 7898, - "instruction": 7899, - "inherited": 7900, - "reverse": 7901, - "emphasis": 7902, - "##vic": 7903, - "employee": 7904, - "arch": 7905, - "keeps": 7906, - "rabbi": 7907, - "watson": 7908, - "payment": 7909, - "uh": 7910, - "##ala": 7911, - "nancy": 7912, - "##tre": 7913, - "venice": 7914, - "fastest": 7915, - "sexy": 7916, - "banned": 7917, - "adrian": 7918, - "properly": 7919, - "ruth": 7920, - "touchdown": 7921, - "dollar": 7922, - "boards": 7923, - "metre": 7924, - "circles": 7925, - "edges": 7926, - "favour": 7927, - "comments": 7928, - "ok": 7929, - "travels": 7930, - "liberation": 7931, - "scattered": 7932, - "firmly": 7933, - "##ular": 7934, - "holland": 7935, - "permitted": 7936, - "diesel": 7937, - "kenya": 7938, - "den": 7939, - "originated": 7940, - "##ral": 7941, - "demons": 7942, - "resumed": 7943, - "dragged": 7944, - "rider": 7945, - "##rus": 7946, - "servant": 7947, - "blinked": 7948, - "extend": 7949, - "torn": 7950, - "##ias": 7951, - "##sey": 7952, - "input": 7953, - "meal": 7954, - "everybody": 7955, - "cylinder": 7956, - "kinds": 7957, - "camps": 7958, - "##fe": 7959, - "bullet": 7960, - "logic": 7961, - "##wn": 7962, - "croatian": 7963, - "evolved": 7964, - "healthy": 7965, - "fool": 7966, - "chocolate": 7967, - "wise": 7968, - "preserve": 7969, - "pradesh": 7970, - "##ess": 7971, - "respective": 7972, - "1850": 7973, - "##ew": 7974, - "chicken": 7975, - "artificial": 7976, - "gross": 7977, - "corresponding": 7978, - "convicted": 7979, - "cage": 7980, - "caroline": 7981, - "dialogue": 7982, - "##dor": 7983, - "narrative": 7984, - "stranger": 7985, - "mario": 7986, - "br": 7987, - "christianity": 7988, - "failing": 7989, - "trent": 7990, - "commanding": 7991, - "buddhist": 7992, - "1848": 7993, - "maurice": 7994, - "focusing": 7995, - "yale": 7996, - "bike": 7997, - "altitude": 7998, - "##ering": 7999, - "mouse": 8000, - "revised": 8001, - "##sley": 8002, - "veteran": 8003, - "##ig": 8004, - "pulls": 8005, - "theology": 8006, - "crashed": 8007, - "campaigns": 8008, - "legion": 8009, - "##ability": 8010, - "drag": 8011, - "excellence": 8012, - "customer": 8013, - "cancelled": 8014, - "intensity": 8015, - "excuse": 8016, - "##lar": 8017, - "liga": 8018, - "participating": 8019, - "contributing": 8020, - "printing": 8021, - "##burn": 8022, - "variable": 8023, - "##rk": 8024, - "curious": 8025, - "bin": 8026, - "legacy": 8027, - "renaissance": 8028, - "##my": 8029, - "symptoms": 8030, - "binding": 8031, - "vocalist": 8032, - "dancer": 8033, - "##nie": 8034, - "grammar": 8035, - "gospel": 8036, - "democrats": 8037, - "ya": 8038, - "enters": 8039, - "sc": 8040, - "diplomatic": 8041, - "hitler": 8042, - "##ser": 8043, - "clouds": 8044, - "mathematical": 8045, - "quit": 8046, - "defended": 8047, - "oriented": 8048, - "##heim": 8049, - "fundamental": 8050, - "hardware": 8051, - "impressive": 8052, - "equally": 8053, - "convince": 8054, - "confederate": 8055, - "guilt": 8056, - "chuck": 8057, - "sliding": 8058, - "##ware": 8059, - "magnetic": 8060, - "narrowed": 8061, - "petersburg": 8062, - "bulgaria": 8063, - "otto": 8064, - "phd": 8065, - "skill": 8066, - "##ama": 8067, - "reader": 8068, - "hopes": 8069, - "pitcher": 8070, - "reservoir": 8071, - "hearts": 8072, - "automatically": 8073, - "expecting": 8074, - "mysterious": 8075, - "bennett": 8076, - "extensively": 8077, - "imagined": 8078, - "seeds": 8079, - "monitor": 8080, - "fix": 8081, - "##ative": 8082, - "journalism": 8083, - "struggling": 8084, - "signature": 8085, - "ranch": 8086, - "encounter": 8087, - "photographer": 8088, - "observation": 8089, - "protests": 8090, - "##pin": 8091, - "influences": 8092, - "##hr": 8093, - "calendar": 8094, - "##all": 8095, - "cruz": 8096, - "croatia": 8097, - "locomotive": 8098, - "hughes": 8099, - "naturally": 8100, - "shakespeare": 8101, - "basement": 8102, - "hook": 8103, - "uncredited": 8104, - "faded": 8105, - "theories": 8106, - "approaches": 8107, - "dare": 8108, - "phillips": 8109, - "filling": 8110, - "fury": 8111, - "obama": 8112, - "##ain": 8113, - "efficient": 8114, - "arc": 8115, - "deliver": 8116, - "min": 8117, - "raid": 8118, - "breeding": 8119, - "inducted": 8120, - "leagues": 8121, - "efficiency": 8122, - "axis": 8123, - "montana": 8124, - "eagles": 8125, - "##ked": 8126, - "supplied": 8127, - "instructions": 8128, - "karen": 8129, - "picking": 8130, - "indicating": 8131, - "trap": 8132, - "anchor": 8133, - "practically": 8134, - "christians": 8135, - "tomb": 8136, - "vary": 8137, - "occasional": 8138, - "electronics": 8139, - "lords": 8140, - "readers": 8141, - "newcastle": 8142, - "faint": 8143, - "innovation": 8144, - "collect": 8145, - "situations": 8146, - "engagement": 8147, - "160": 8148, - "claude": 8149, - "mixture": 8150, - "##feld": 8151, - "peer": 8152, - "tissue": 8153, - "logo": 8154, - "lean": 8155, - "##ration": 8156, - "°f": 8157, - "floors": 8158, - "##ven": 8159, - "architects": 8160, - "reducing": 8161, - "##our": 8162, - "##ments": 8163, - "rope": 8164, - "1859": 8165, - "ottawa": 8166, - "##har": 8167, - "samples": 8168, - "banking": 8169, - "declaration": 8170, - "proteins": 8171, - "resignation": 8172, - "francois": 8173, - "saudi": 8174, - "advocate": 8175, - "exhibited": 8176, - "armor": 8177, - "twins": 8178, - "divorce": 8179, - "##ras": 8180, - "abraham": 8181, - "reviewed": 8182, - "jo": 8183, - "temporarily": 8184, - "matrix": 8185, - "physically": 8186, - "pulse": 8187, - "curled": 8188, - "##ena": 8189, - "difficulties": 8190, - "bengal": 8191, - "usage": 8192, - "##ban": 8193, - "annie": 8194, - "riders": 8195, - "certificate": 8196, - "##pi": 8197, - "holes": 8198, - "warsaw": 8199, - "distinctive": 8200, - "jessica": 8201, - "##mon": 8202, - "mutual": 8203, - "1857": 8204, - "customs": 8205, - "circular": 8206, - "eugene": 8207, - "removal": 8208, - "loaded": 8209, - "mere": 8210, - "vulnerable": 8211, - "depicted": 8212, - "generations": 8213, - "dame": 8214, - "heir": 8215, - "enormous": 8216, - "lightly": 8217, - "climbing": 8218, - "pitched": 8219, - "lessons": 8220, - "pilots": 8221, - "nepal": 8222, - "ram": 8223, - "google": 8224, - "preparing": 8225, - "brad": 8226, - "louise": 8227, - "renowned": 8228, - "##₂": 8229, - "liam": 8230, - "##ably": 8231, - "plaza": 8232, - "shaw": 8233, - "sophie": 8234, - "brilliant": 8235, - "bills": 8236, - "##bar": 8237, - "##nik": 8238, - "fucking": 8239, - "mainland": 8240, - "server": 8241, - "pleasant": 8242, - "seized": 8243, - "veterans": 8244, - "jerked": 8245, - "fail": 8246, - "beta": 8247, - "brush": 8248, - "radiation": 8249, - "stored": 8250, - "warmth": 8251, - "southeastern": 8252, - "nate": 8253, - "sin": 8254, - "raced": 8255, - "berkeley": 8256, - "joke": 8257, - "athlete": 8258, - "designation": 8259, - "trunk": 8260, - "##low": 8261, - "roland": 8262, - "qualification": 8263, - "archives": 8264, - "heels": 8265, - "artwork": 8266, - "receives": 8267, - "judicial": 8268, - "reserves": 8269, - "##bed": 8270, - "woke": 8271, - "installation": 8272, - "abu": 8273, - "floating": 8274, - "fake": 8275, - "lesser": 8276, - "excitement": 8277, - "interface": 8278, - "concentrated": 8279, - "addressed": 8280, - "characteristic": 8281, - "amanda": 8282, - "saxophone": 8283, - "monk": 8284, - "auto": 8285, - "##bus": 8286, - "releasing": 8287, - "egg": 8288, - "dies": 8289, - "interaction": 8290, - "defender": 8291, - "ce": 8292, - "outbreak": 8293, - "glory": 8294, - "loving": 8295, - "##bert": 8296, - "sequel": 8297, - "consciousness": 8298, - "http": 8299, - "awake": 8300, - "ski": 8301, - "enrolled": 8302, - "##ress": 8303, - "handling": 8304, - "rookie": 8305, - "brow": 8306, - "somebody": 8307, - "biography": 8308, - "warfare": 8309, - "amounts": 8310, - "contracts": 8311, - "presentation": 8312, - "fabric": 8313, - "dissolved": 8314, - "challenged": 8315, - "meter": 8316, - "psychological": 8317, - "lt": 8318, - "elevated": 8319, - "rally": 8320, - "accurate": 8321, - "##tha": 8322, - "hospitals": 8323, - "undergraduate": 8324, - "specialist": 8325, - "venezuela": 8326, - "exhibit": 8327, - "shed": 8328, - "nursing": 8329, - "protestant": 8330, - "fluid": 8331, - "structural": 8332, - "footage": 8333, - "jared": 8334, - "consistent": 8335, - "prey": 8336, - "##ska": 8337, - "succession": 8338, - "reflect": 8339, - "exile": 8340, - "lebanon": 8341, - "wiped": 8342, - "suspect": 8343, - "shanghai": 8344, - "resting": 8345, - "integration": 8346, - "preservation": 8347, - "marvel": 8348, - "variant": 8349, - "pirates": 8350, - "sheep": 8351, - "rounded": 8352, - "capita": 8353, - "sailing": 8354, - "colonies": 8355, - "manuscript": 8356, - "deemed": 8357, - "variations": 8358, - "clarke": 8359, - "functional": 8360, - "emerging": 8361, - "boxing": 8362, - "relaxed": 8363, - "curse": 8364, - "azerbaijan": 8365, - "heavyweight": 8366, - "nickname": 8367, - "editorial": 8368, - "rang": 8369, - "grid": 8370, - "tightened": 8371, - "earthquake": 8372, - "flashed": 8373, - "miguel": 8374, - "rushing": 8375, - "##ches": 8376, - "improvements": 8377, - "boxes": 8378, - "brooks": 8379, - "180": 8380, - "consumption": 8381, - "molecular": 8382, - "felix": 8383, - "societies": 8384, - "repeatedly": 8385, - "variation": 8386, - "aids": 8387, - "civic": 8388, - "graphics": 8389, - "professionals": 8390, - "realm": 8391, - "autonomous": 8392, - "receiver": 8393, - "delayed": 8394, - "workshop": 8395, - "militia": 8396, - "chairs": 8397, - "trump": 8398, - "canyon": 8399, - "##point": 8400, - "harsh": 8401, - "extending": 8402, - "lovely": 8403, - "happiness": 8404, - "##jan": 8405, - "stake": 8406, - "eyebrows": 8407, - "embassy": 8408, - "wellington": 8409, - "hannah": 8410, - "##ella": 8411, - "sony": 8412, - "corners": 8413, - "bishops": 8414, - "swear": 8415, - "cloth": 8416, - "contents": 8417, - "xi": 8418, - "namely": 8419, - "commenced": 8420, - "1854": 8421, - "stanford": 8422, - "nashville": 8423, - "courage": 8424, - "graphic": 8425, - "commitment": 8426, - "garrison": 8427, - "##bin": 8428, - "hamlet": 8429, - "clearing": 8430, - "rebels": 8431, - "attraction": 8432, - "literacy": 8433, - "cooking": 8434, - "ruins": 8435, - "temples": 8436, - "jenny": 8437, - "humanity": 8438, - "celebrate": 8439, - "hasn": 8440, - "freight": 8441, - "sixty": 8442, - "rebel": 8443, - "bastard": 8444, - "##art": 8445, - "newton": 8446, - "##ada": 8447, - "deer": 8448, - "##ges": 8449, - "##ching": 8450, - "smiles": 8451, - "delaware": 8452, - "singers": 8453, - "##ets": 8454, - "approaching": 8455, - "assists": 8456, - "flame": 8457, - "##ph": 8458, - "boulevard": 8459, - "barrel": 8460, - "planted": 8461, - "##ome": 8462, - "pursuit": 8463, - "##sia": 8464, - "consequences": 8465, - "posts": 8466, - "shallow": 8467, - "invitation": 8468, - "rode": 8469, - "depot": 8470, - "ernest": 8471, - "kane": 8472, - "rod": 8473, - "concepts": 8474, - "preston": 8475, - "topic": 8476, - "chambers": 8477, - "striking": 8478, - "blast": 8479, - "arrives": 8480, - "descendants": 8481, - "montgomery": 8482, - "ranges": 8483, - "worlds": 8484, - "##lay": 8485, - "##ari": 8486, - "span": 8487, - "chaos": 8488, - "praise": 8489, - "##ag": 8490, - "fewer": 8491, - "1855": 8492, - "sanctuary": 8493, - "mud": 8494, - "fbi": 8495, - "##ions": 8496, - "programmes": 8497, - "maintaining": 8498, - "unity": 8499, - "harper": 8500, - "bore": 8501, - "handsome": 8502, - "closure": 8503, - "tournaments": 8504, - "thunder": 8505, - "nebraska": 8506, - "linda": 8507, - "facade": 8508, - "puts": 8509, - "satisfied": 8510, - "argentine": 8511, - "dale": 8512, - "cork": 8513, - "dome": 8514, - "panama": 8515, - "##yl": 8516, - "1858": 8517, - "tasks": 8518, - "experts": 8519, - "##ates": 8520, - "feeding": 8521, - "equation": 8522, - "##las": 8523, - "##ida": 8524, - "##tu": 8525, - "engage": 8526, - "bryan": 8527, - "##ax": 8528, - "um": 8529, - "quartet": 8530, - "melody": 8531, - "disbanded": 8532, - "sheffield": 8533, - "blocked": 8534, - "gasped": 8535, - "delay": 8536, - "kisses": 8537, - "maggie": 8538, - "connects": 8539, - "##non": 8540, - "sts": 8541, - "poured": 8542, - "creator": 8543, - "publishers": 8544, - "##we": 8545, - "guided": 8546, - "ellis": 8547, - "extinct": 8548, - "hug": 8549, - "gaining": 8550, - "##ord": 8551, - "complicated": 8552, - "##bility": 8553, - "poll": 8554, - "clenched": 8555, - "investigate": 8556, - "##use": 8557, - "thereby": 8558, - "quantum": 8559, - "spine": 8560, - "cdp": 8561, - "humor": 8562, - "kills": 8563, - "administered": 8564, - "semifinals": 8565, - "##du": 8566, - "encountered": 8567, - "ignore": 8568, - "##bu": 8569, - "commentary": 8570, - "##maker": 8571, - "bother": 8572, - "roosevelt": 8573, - "140": 8574, - "plains": 8575, - "halfway": 8576, - "flowing": 8577, - "cultures": 8578, - "crack": 8579, - "imprisoned": 8580, - "neighboring": 8581, - "airline": 8582, - "##ses": 8583, - "##view": 8584, - "##mate": 8585, - "##ec": 8586, - "gather": 8587, - "wolves": 8588, - "marathon": 8589, - "transformed": 8590, - "##ill": 8591, - "cruise": 8592, - "organisations": 8593, - "carol": 8594, - "punch": 8595, - "exhibitions": 8596, - "numbered": 8597, - "alarm": 8598, - "ratings": 8599, - "daddy": 8600, - "silently": 8601, - "##stein": 8602, - "queens": 8603, - "colours": 8604, - "impression": 8605, - "guidance": 8606, - "liu": 8607, - "tactical": 8608, - "##rat": 8609, - "marshal": 8610, - "della": 8611, - "arrow": 8612, - "##ings": 8613, - "rested": 8614, - "feared": 8615, - "tender": 8616, - "owns": 8617, - "bitter": 8618, - "advisor": 8619, - "escort": 8620, - "##ides": 8621, - "spare": 8622, - "farms": 8623, - "grants": 8624, - "##ene": 8625, - "dragons": 8626, - "encourage": 8627, - "colleagues": 8628, - "cameras": 8629, - "##und": 8630, - "sucked": 8631, - "pile": 8632, - "spirits": 8633, - "prague": 8634, - "statements": 8635, - "suspension": 8636, - "landmark": 8637, - "fence": 8638, - "torture": 8639, - "recreation": 8640, - "bags": 8641, - "permanently": 8642, - "survivors": 8643, - "pond": 8644, - "spy": 8645, - "predecessor": 8646, - "bombing": 8647, - "coup": 8648, - "##og": 8649, - "protecting": 8650, - "transformation": 8651, - "glow": 8652, - "##lands": 8653, - "##book": 8654, - "dug": 8655, - "priests": 8656, - "andrea": 8657, - "feat": 8658, - "barn": 8659, - "jumping": 8660, - "##chen": 8661, - "##ologist": 8662, - "##con": 8663, - "casualties": 8664, - "stern": 8665, - "auckland": 8666, - "pipe": 8667, - "serie": 8668, - "revealing": 8669, - "ba": 8670, - "##bel": 8671, - "trevor": 8672, - "mercy": 8673, - "spectrum": 8674, - "yang": 8675, - "consist": 8676, - "governing": 8677, - "collaborated": 8678, - "possessed": 8679, - "epic": 8680, - "comprises": 8681, - "blew": 8682, - "shane": 8683, - "##ack": 8684, - "lopez": 8685, - "honored": 8686, - "magical": 8687, - "sacrifice": 8688, - "judgment": 8689, - "perceived": 8690, - "hammer": 8691, - "mtv": 8692, - "baronet": 8693, - "tune": 8694, - "das": 8695, - "missionary": 8696, - "sheets": 8697, - "350": 8698, - "neutral": 8699, - "oral": 8700, - "threatening": 8701, - "attractive": 8702, - "shade": 8703, - "aims": 8704, - "seminary": 8705, - "##master": 8706, - "estates": 8707, - "1856": 8708, - "michel": 8709, - "wounds": 8710, - "refugees": 8711, - "manufacturers": 8712, - "##nic": 8713, - "mercury": 8714, - "syndrome": 8715, - "porter": 8716, - "##iya": 8717, - "##din": 8718, - "hamburg": 8719, - "identification": 8720, - "upstairs": 8721, - "purse": 8722, - "widened": 8723, - "pause": 8724, - "cared": 8725, - "breathed": 8726, - "affiliate": 8727, - "santiago": 8728, - "prevented": 8729, - "celtic": 8730, - "fisher": 8731, - "125": 8732, - "recruited": 8733, - "byzantine": 8734, - "reconstruction": 8735, - "farther": 8736, - "##mp": 8737, - "diet": 8738, - "sake": 8739, - "au": 8740, - "spite": 8741, - "sensation": 8742, - "##ert": 8743, - "blank": 8744, - "separation": 8745, - "105": 8746, - "##hon": 8747, - "vladimir": 8748, - "armies": 8749, - "anime": 8750, - "##lie": 8751, - "accommodate": 8752, - "orbit": 8753, - "cult": 8754, - "sofia": 8755, - "archive": 8756, - "##ify": 8757, - "##box": 8758, - "founders": 8759, - "sustained": 8760, - "disorder": 8761, - "honours": 8762, - "northeastern": 8763, - "mia": 8764, - "crops": 8765, - "violet": 8766, - "threats": 8767, - "blanket": 8768, - "fires": 8769, - "canton": 8770, - "followers": 8771, - "southwestern": 8772, - "prototype": 8773, - "voyage": 8774, - "assignment": 8775, - "altered": 8776, - "moderate": 8777, - "protocol": 8778, - "pistol": 8779, - "##eo": 8780, - "questioned": 8781, - "brass": 8782, - "lifting": 8783, - "1852": 8784, - "math": 8785, - "authored": 8786, - "##ual": 8787, - "doug": 8788, - "dimensional": 8789, - "dynamic": 8790, - "##san": 8791, - "1851": 8792, - "pronounced": 8793, - "grateful": 8794, - "quest": 8795, - "uncomfortable": 8796, - "boom": 8797, - "presidency": 8798, - "stevens": 8799, - "relating": 8800, - "politicians": 8801, - "chen": 8802, - "barrier": 8803, - "quinn": 8804, - "diana": 8805, - "mosque": 8806, - "tribal": 8807, - "cheese": 8808, - "palmer": 8809, - "portions": 8810, - "sometime": 8811, - "chester": 8812, - "treasure": 8813, - "wu": 8814, - "bend": 8815, - "download": 8816, - "millions": 8817, - "reforms": 8818, - "registration": 8819, - "##osa": 8820, - "consequently": 8821, - "monitoring": 8822, - "ate": 8823, - "preliminary": 8824, - "brandon": 8825, - "invented": 8826, - "ps": 8827, - "eaten": 8828, - "exterior": 8829, - "intervention": 8830, - "ports": 8831, - "documented": 8832, - "log": 8833, - "displays": 8834, - "lecture": 8835, - "sally": 8836, - "favourite": 8837, - "##itz": 8838, - "vermont": 8839, - "lo": 8840, - "invisible": 8841, - "isle": 8842, - "breed": 8843, - "##ator": 8844, - "journalists": 8845, - "relay": 8846, - "speaks": 8847, - "backward": 8848, - "explore": 8849, - "midfielder": 8850, - "actively": 8851, - "stefan": 8852, - "procedures": 8853, - "cannon": 8854, - "blond": 8855, - "kenneth": 8856, - "centered": 8857, - "servants": 8858, - "chains": 8859, - "libraries": 8860, - "malcolm": 8861, - "essex": 8862, - "henri": 8863, - "slavery": 8864, - "##hal": 8865, - "facts": 8866, - "fairy": 8867, - "coached": 8868, - "cassie": 8869, - "cats": 8870, - "washed": 8871, - "cop": 8872, - "##fi": 8873, - "announcement": 8874, - "item": 8875, - "2000s": 8876, - "vinyl": 8877, - "activated": 8878, - "marco": 8879, - "frontier": 8880, - "growled": 8881, - "curriculum": 8882, - "##das": 8883, - "loyal": 8884, - "accomplished": 8885, - "leslie": 8886, - "ritual": 8887, - "kenny": 8888, - "##00": 8889, - "vii": 8890, - "napoleon": 8891, - "hollow": 8892, - "hybrid": 8893, - "jungle": 8894, - "stationed": 8895, - "friedrich": 8896, - "counted": 8897, - "##ulated": 8898, - "platinum": 8899, - "theatrical": 8900, - "seated": 8901, - "col": 8902, - "rubber": 8903, - "glen": 8904, - "1840": 8905, - "diversity": 8906, - "healing": 8907, - "extends": 8908, - "id": 8909, - "provisions": 8910, - "administrator": 8911, - "columbus": 8912, - "##oe": 8913, - "tributary": 8914, - "te": 8915, - "assured": 8916, - "org": 8917, - "##uous": 8918, - "prestigious": 8919, - "examined": 8920, - "lectures": 8921, - "grammy": 8922, - "ronald": 8923, - "associations": 8924, - "bailey": 8925, - "allan": 8926, - "essays": 8927, - "flute": 8928, - "believing": 8929, - "consultant": 8930, - "proceedings": 8931, - "travelling": 8932, - "1853": 8933, - "kit": 8934, - "kerala": 8935, - "yugoslavia": 8936, - "buddy": 8937, - "methodist": 8938, - "##ith": 8939, - "burial": 8940, - "centres": 8941, - "batman": 8942, - "##nda": 8943, - "discontinued": 8944, - "bo": 8945, - "dock": 8946, - "stockholm": 8947, - "lungs": 8948, - "severely": 8949, - "##nk": 8950, - "citing": 8951, - "manga": 8952, - "##ugh": 8953, - "steal": 8954, - "mumbai": 8955, - "iraqi": 8956, - "robot": 8957, - "celebrity": 8958, - "bride": 8959, - "broadcasts": 8960, - "abolished": 8961, - "pot": 8962, - "joel": 8963, - "overhead": 8964, - "franz": 8965, - "packed": 8966, - "reconnaissance": 8967, - "johann": 8968, - "acknowledged": 8969, - "introduce": 8970, - "handled": 8971, - "doctorate": 8972, - "developments": 8973, - "drinks": 8974, - "alley": 8975, - "palestine": 8976, - "##nis": 8977, - "##aki": 8978, - "proceeded": 8979, - "recover": 8980, - "bradley": 8981, - "grain": 8982, - "patch": 8983, - "afford": 8984, - "infection": 8985, - "nationalist": 8986, - "legendary": 8987, - "##ath": 8988, - "interchange": 8989, - "virtually": 8990, - "gen": 8991, - "gravity": 8992, - "exploration": 8993, - "amber": 8994, - "vital": 8995, - "wishes": 8996, - "powell": 8997, - "doctrine": 8998, - "elbow": 8999, - "screenplay": 9000, - "##bird": 9001, - "contribute": 9002, - "indonesian": 9003, - "pet": 9004, - "creates": 9005, - "##com": 9006, - "enzyme": 9007, - "kylie": 9008, - "discipline": 9009, - "drops": 9010, - "manila": 9011, - "hunger": 9012, - "##ien": 9013, - "layers": 9014, - "suffer": 9015, - "fever": 9016, - "bits": 9017, - "monica": 9018, - "keyboard": 9019, - "manages": 9020, - "##hood": 9021, - "searched": 9022, - "appeals": 9023, - "##bad": 9024, - "testament": 9025, - "grande": 9026, - "reid": 9027, - "##war": 9028, - "beliefs": 9029, - "congo": 9030, - "##ification": 9031, - "##dia": 9032, - "si": 9033, - "requiring": 9034, - "##via": 9035, - "casey": 9036, - "1849": 9037, - "regret": 9038, - "streak": 9039, - "rape": 9040, - "depends": 9041, - "syrian": 9042, - "sprint": 9043, - "pound": 9044, - "tourists": 9045, - "upcoming": 9046, - "pub": 9047, - "##xi": 9048, - "tense": 9049, - "##els": 9050, - "practiced": 9051, - "echo": 9052, - "nationwide": 9053, - "guild": 9054, - "motorcycle": 9055, - "liz": 9056, - "##zar": 9057, - "chiefs": 9058, - "desired": 9059, - "elena": 9060, - "bye": 9061, - "precious": 9062, - "absorbed": 9063, - "relatives": 9064, - "booth": 9065, - "pianist": 9066, - "##mal": 9067, - "citizenship": 9068, - "exhausted": 9069, - "wilhelm": 9070, - "##ceae": 9071, - "##hed": 9072, - "noting": 9073, - "quarterback": 9074, - "urge": 9075, - "hectares": 9076, - "##gue": 9077, - "ace": 9078, - "holly": 9079, - "##tal": 9080, - "blonde": 9081, - "davies": 9082, - "parked": 9083, - "sustainable": 9084, - "stepping": 9085, - "twentieth": 9086, - "airfield": 9087, - "galaxy": 9088, - "nest": 9089, - "chip": 9090, - "##nell": 9091, - "tan": 9092, - "shaft": 9093, - "paulo": 9094, - "requirement": 9095, - "##zy": 9096, - "paradise": 9097, - "tobacco": 9098, - "trans": 9099, - "renewed": 9100, - "vietnamese": 9101, - "##cker": 9102, - "##ju": 9103, - "suggesting": 9104, - "catching": 9105, - "holmes": 9106, - "enjoying": 9107, - "md": 9108, - "trips": 9109, - "colt": 9110, - "holder": 9111, - "butterfly": 9112, - "nerve": 9113, - "reformed": 9114, - "cherry": 9115, - "bowling": 9116, - "trailer": 9117, - "carriage": 9118, - "goodbye": 9119, - "appreciate": 9120, - "toy": 9121, - "joshua": 9122, - "interactive": 9123, - "enabled": 9124, - "involve": 9125, - "##kan": 9126, - "collar": 9127, - "determination": 9128, - "bunch": 9129, - "facebook": 9130, - "recall": 9131, - "shorts": 9132, - "superintendent": 9133, - "episcopal": 9134, - "frustration": 9135, - "giovanni": 9136, - "nineteenth": 9137, - "laser": 9138, - "privately": 9139, - "array": 9140, - "circulation": 9141, - "##ovic": 9142, - "armstrong": 9143, - "deals": 9144, - "painful": 9145, - "permit": 9146, - "discrimination": 9147, - "##wi": 9148, - "aires": 9149, - "retiring": 9150, - "cottage": 9151, - "ni": 9152, - "##sta": 9153, - "horizon": 9154, - "ellen": 9155, - "jamaica": 9156, - "ripped": 9157, - "fernando": 9158, - "chapters": 9159, - "playstation": 9160, - "patron": 9161, - "lecturer": 9162, - "navigation": 9163, - "behaviour": 9164, - "genes": 9165, - "georgian": 9166, - "export": 9167, - "solomon": 9168, - "rivals": 9169, - "swift": 9170, - "seventeen": 9171, - "rodriguez": 9172, - "princeton": 9173, - "independently": 9174, - "sox": 9175, - "1847": 9176, - "arguing": 9177, - "entity": 9178, - "casting": 9179, - "hank": 9180, - "criteria": 9181, - "oakland": 9182, - "geographic": 9183, - "milwaukee": 9184, - "reflection": 9185, - "expanding": 9186, - "conquest": 9187, - "dubbed": 9188, - "##tv": 9189, - "halt": 9190, - "brave": 9191, - "brunswick": 9192, - "doi": 9193, - "arched": 9194, - "curtis": 9195, - "divorced": 9196, - "predominantly": 9197, - "somerset": 9198, - "streams": 9199, - "ugly": 9200, - "zoo": 9201, - "horrible": 9202, - "curved": 9203, - "buenos": 9204, - "fierce": 9205, - "dictionary": 9206, - "vector": 9207, - "theological": 9208, - "unions": 9209, - "handful": 9210, - "stability": 9211, - "chan": 9212, - "punjab": 9213, - "segments": 9214, - "##lly": 9215, - "altar": 9216, - "ignoring": 9217, - "gesture": 9218, - "monsters": 9219, - "pastor": 9220, - "##stone": 9221, - "thighs": 9222, - "unexpected": 9223, - "operators": 9224, - "abruptly": 9225, - "coin": 9226, - "compiled": 9227, - "associates": 9228, - "improving": 9229, - "migration": 9230, - "pin": 9231, - "##ose": 9232, - "compact": 9233, - "collegiate": 9234, - "reserved": 9235, - "##urs": 9236, - "quarterfinals": 9237, - "roster": 9238, - "restore": 9239, - "assembled": 9240, - "hurry": 9241, - "oval": 9242, - "##cies": 9243, - "1846": 9244, - "flags": 9245, - "martha": 9246, - "##del": 9247, - "victories": 9248, - "sharply": 9249, - "##rated": 9250, - "argues": 9251, - "deadly": 9252, - "neo": 9253, - "drawings": 9254, - "symbols": 9255, - "performer": 9256, - "##iel": 9257, - "griffin": 9258, - "restrictions": 9259, - "editing": 9260, - "andrews": 9261, - "java": 9262, - "journals": 9263, - "arabia": 9264, - "compositions": 9265, - "dee": 9266, - "pierce": 9267, - "removing": 9268, - "hindi": 9269, - "casino": 9270, - "runway": 9271, - "civilians": 9272, - "minds": 9273, - "nasa": 9274, - "hotels": 9275, - "##zation": 9276, - "refuge": 9277, - "rent": 9278, - "retain": 9279, - "potentially": 9280, - "conferences": 9281, - "suburban": 9282, - "conducting": 9283, - "##tto": 9284, - "##tions": 9285, - "##tle": 9286, - "descended": 9287, - "massacre": 9288, - "##cal": 9289, - "ammunition": 9290, - "terrain": 9291, - "fork": 9292, - "souls": 9293, - "counts": 9294, - "chelsea": 9295, - "durham": 9296, - "drives": 9297, - "cab": 9298, - "##bank": 9299, - "perth": 9300, - "realizing": 9301, - "palestinian": 9302, - "finn": 9303, - "simpson": 9304, - "##dal": 9305, - "betty": 9306, - "##ule": 9307, - "moreover": 9308, - "particles": 9309, - "cardinals": 9310, - "tent": 9311, - "evaluation": 9312, - "extraordinary": 9313, - "##oid": 9314, - "inscription": 9315, - "##works": 9316, - "wednesday": 9317, - "chloe": 9318, - "maintains": 9319, - "panels": 9320, - "ashley": 9321, - "trucks": 9322, - "##nation": 9323, - "cluster": 9324, - "sunlight": 9325, - "strikes": 9326, - "zhang": 9327, - "##wing": 9328, - "dialect": 9329, - "canon": 9330, - "##ap": 9331, - "tucked": 9332, - "##ws": 9333, - "collecting": 9334, - "##mas": 9335, - "##can": 9336, - "##sville": 9337, - "maker": 9338, - "quoted": 9339, - "evan": 9340, - "franco": 9341, - "aria": 9342, - "buying": 9343, - "cleaning": 9344, - "eva": 9345, - "closet": 9346, - "provision": 9347, - "apollo": 9348, - "clinic": 9349, - "rat": 9350, - "##ez": 9351, - "necessarily": 9352, - "ac": 9353, - "##gle": 9354, - "##ising": 9355, - "venues": 9356, - "flipped": 9357, - "cent": 9358, - "spreading": 9359, - "trustees": 9360, - "checking": 9361, - "authorized": 9362, - "##sco": 9363, - "disappointed": 9364, - "##ado": 9365, - "notion": 9366, - "duration": 9367, - "trumpet": 9368, - "hesitated": 9369, - "topped": 9370, - "brussels": 9371, - "rolls": 9372, - "theoretical": 9373, - "hint": 9374, - "define": 9375, - "aggressive": 9376, - "repeat": 9377, - "wash": 9378, - "peaceful": 9379, - "optical": 9380, - "width": 9381, - "allegedly": 9382, - "mcdonald": 9383, - "strict": 9384, - "copyright": 9385, - "##illa": 9386, - "investors": 9387, - "mar": 9388, - "jam": 9389, - "witnesses": 9390, - "sounding": 9391, - "miranda": 9392, - "michelle": 9393, - "privacy": 9394, - "hugo": 9395, - "harmony": 9396, - "##pp": 9397, - "valid": 9398, - "lynn": 9399, - "glared": 9400, - "nina": 9401, - "102": 9402, - "headquartered": 9403, - "diving": 9404, - "boarding": 9405, - "gibson": 9406, - "##ncy": 9407, - "albanian": 9408, - "marsh": 9409, - "routine": 9410, - "dealt": 9411, - "enhanced": 9412, - "er": 9413, - "intelligent": 9414, - "substance": 9415, - "targeted": 9416, - "enlisted": 9417, - "discovers": 9418, - "spinning": 9419, - "observations": 9420, - "pissed": 9421, - "smoking": 9422, - "rebecca": 9423, - "capitol": 9424, - "visa": 9425, - "varied": 9426, - "costume": 9427, - "seemingly": 9428, - "indies": 9429, - "compensation": 9430, - "surgeon": 9431, - "thursday": 9432, - "arsenal": 9433, - "westminster": 9434, - "suburbs": 9435, - "rid": 9436, - "anglican": 9437, - "##ridge": 9438, - "knots": 9439, - "foods": 9440, - "alumni": 9441, - "lighter": 9442, - "fraser": 9443, - "whoever": 9444, - "portal": 9445, - "scandal": 9446, - "##ray": 9447, - "gavin": 9448, - "advised": 9449, - "instructor": 9450, - "flooding": 9451, - "terrorist": 9452, - "##ale": 9453, - "teenage": 9454, - "interim": 9455, - "senses": 9456, - "duck": 9457, - "teen": 9458, - "thesis": 9459, - "abby": 9460, - "eager": 9461, - "overcome": 9462, - "##ile": 9463, - "newport": 9464, - "glenn": 9465, - "rises": 9466, - "shame": 9467, - "##cc": 9468, - "prompted": 9469, - "priority": 9470, - "forgot": 9471, - "bomber": 9472, - "nicolas": 9473, - "protective": 9474, - "360": 9475, - "cartoon": 9476, - "katherine": 9477, - "breeze": 9478, - "lonely": 9479, - "trusted": 9480, - "henderson": 9481, - "richardson": 9482, - "relax": 9483, - "banner": 9484, - "candy": 9485, - "palms": 9486, - "remarkable": 9487, - "##rio": 9488, - "legends": 9489, - "cricketer": 9490, - "essay": 9491, - "ordained": 9492, - "edmund": 9493, - "rifles": 9494, - "trigger": 9495, - "##uri": 9496, - "##away": 9497, - "sail": 9498, - "alert": 9499, - "1830": 9500, - "audiences": 9501, - "penn": 9502, - "sussex": 9503, - "siblings": 9504, - "pursued": 9505, - "indianapolis": 9506, - "resist": 9507, - "rosa": 9508, - "consequence": 9509, - "succeed": 9510, - "avoided": 9511, - "1845": 9512, - "##ulation": 9513, - "inland": 9514, - "##tie": 9515, - "##nna": 9516, - "counsel": 9517, - "profession": 9518, - "chronicle": 9519, - "hurried": 9520, - "##una": 9521, - "eyebrow": 9522, - "eventual": 9523, - "bleeding": 9524, - "innovative": 9525, - "cure": 9526, - "##dom": 9527, - "committees": 9528, - "accounting": 9529, - "con": 9530, - "scope": 9531, - "hardy": 9532, - "heather": 9533, - "tenor": 9534, - "gut": 9535, - "herald": 9536, - "codes": 9537, - "tore": 9538, - "scales": 9539, - "wagon": 9540, - "##oo": 9541, - "luxury": 9542, - "tin": 9543, - "prefer": 9544, - "fountain": 9545, - "triangle": 9546, - "bonds": 9547, - "darling": 9548, - "convoy": 9549, - "dried": 9550, - "traced": 9551, - "beings": 9552, - "troy": 9553, - "accidentally": 9554, - "slam": 9555, - "findings": 9556, - "smelled": 9557, - "joey": 9558, - "lawyers": 9559, - "outcome": 9560, - "steep": 9561, - "bosnia": 9562, - "configuration": 9563, - "shifting": 9564, - "toll": 9565, - "brook": 9566, - "performers": 9567, - "lobby": 9568, - "philosophical": 9569, - "construct": 9570, - "shrine": 9571, - "aggregate": 9572, - "boot": 9573, - "cox": 9574, - "phenomenon": 9575, - "savage": 9576, - "insane": 9577, - "solely": 9578, - "reynolds": 9579, - "lifestyle": 9580, - "##ima": 9581, - "nationally": 9582, - "holdings": 9583, - "consideration": 9584, - "enable": 9585, - "edgar": 9586, - "mo": 9587, - "mama": 9588, - "##tein": 9589, - "fights": 9590, - "relegation": 9591, - "chances": 9592, - "atomic": 9593, - "hub": 9594, - "conjunction": 9595, - "awkward": 9596, - "reactions": 9597, - "currency": 9598, - "finale": 9599, - "kumar": 9600, - "underwent": 9601, - "steering": 9602, - "elaborate": 9603, - "gifts": 9604, - "comprising": 9605, - "melissa": 9606, - "veins": 9607, - "reasonable": 9608, - "sunshine": 9609, - "chi": 9610, - "solve": 9611, - "trails": 9612, - "inhabited": 9613, - "elimination": 9614, - "ethics": 9615, - "huh": 9616, - "ana": 9617, - "molly": 9618, - "consent": 9619, - "apartments": 9620, - "layout": 9621, - "marines": 9622, - "##ces": 9623, - "hunters": 9624, - "bulk": 9625, - "##oma": 9626, - "hometown": 9627, - "##wall": 9628, - "##mont": 9629, - "cracked": 9630, - "reads": 9631, - "neighbouring": 9632, - "withdrawn": 9633, - "admission": 9634, - "wingspan": 9635, - "damned": 9636, - "anthology": 9637, - "lancashire": 9638, - "brands": 9639, - "batting": 9640, - "forgive": 9641, - "cuban": 9642, - "awful": 9643, - "##lyn": 9644, - "104": 9645, - "dimensions": 9646, - "imagination": 9647, - "##ade": 9648, - "dante": 9649, - "##ship": 9650, - "tracking": 9651, - "desperately": 9652, - "goalkeeper": 9653, - "##yne": 9654, - "groaned": 9655, - "workshops": 9656, - "confident": 9657, - "burton": 9658, - "gerald": 9659, - "milton": 9660, - "circus": 9661, - "uncertain": 9662, - "slope": 9663, - "copenhagen": 9664, - "sophia": 9665, - "fog": 9666, - "philosopher": 9667, - "portraits": 9668, - "accent": 9669, - "cycling": 9670, - "varying": 9671, - "gripped": 9672, - "larvae": 9673, - "garrett": 9674, - "specified": 9675, - "scotia": 9676, - "mature": 9677, - "luther": 9678, - "kurt": 9679, - "rap": 9680, - "##kes": 9681, - "aerial": 9682, - "750": 9683, - "ferdinand": 9684, - "heated": 9685, - "es": 9686, - "transported": 9687, - "##shan": 9688, - "safely": 9689, - "nonetheless": 9690, - "##orn": 9691, - "##gal": 9692, - "motors": 9693, - "demanding": 9694, - "##sburg": 9695, - "startled": 9696, - "##brook": 9697, - "ally": 9698, - "generate": 9699, - "caps": 9700, - "ghana": 9701, - "stained": 9702, - "demo": 9703, - "mentions": 9704, - "beds": 9705, - "ap": 9706, - "afterward": 9707, - "diary": 9708, - "##bling": 9709, - "utility": 9710, - "##iro": 9711, - "richards": 9712, - "1837": 9713, - "conspiracy": 9714, - "conscious": 9715, - "shining": 9716, - "footsteps": 9717, - "observer": 9718, - "cyprus": 9719, - "urged": 9720, - "loyalty": 9721, - "developer": 9722, - "probability": 9723, - "olive": 9724, - "upgraded": 9725, - "gym": 9726, - "miracle": 9727, - "insects": 9728, - "graves": 9729, - "1844": 9730, - "ourselves": 9731, - "hydrogen": 9732, - "amazon": 9733, - "katie": 9734, - "tickets": 9735, - "poets": 9736, - "##pm": 9737, - "planes": 9738, - "##pan": 9739, - "prevention": 9740, - "witnessed": 9741, - "dense": 9742, - "jin": 9743, - "randy": 9744, - "tang": 9745, - "warehouse": 9746, - "monroe": 9747, - "bang": 9748, - "archived": 9749, - "elderly": 9750, - "investigations": 9751, - "alec": 9752, - "granite": 9753, - "mineral": 9754, - "conflicts": 9755, - "controlling": 9756, - "aboriginal": 9757, - "carlo": 9758, - "##zu": 9759, - "mechanics": 9760, - "stan": 9761, - "stark": 9762, - "rhode": 9763, - "skirt": 9764, - "est": 9765, - "##berry": 9766, - "bombs": 9767, - "respected": 9768, - "##horn": 9769, - "imposed": 9770, - "limestone": 9771, - "deny": 9772, - "nominee": 9773, - "memphis": 9774, - "grabbing": 9775, - "disabled": 9776, - "##als": 9777, - "amusement": 9778, - "aa": 9779, - "frankfurt": 9780, - "corn": 9781, - "referendum": 9782, - "varies": 9783, - "slowed": 9784, - "disk": 9785, - "firms": 9786, - "unconscious": 9787, - "incredible": 9788, - "clue": 9789, - "sue": 9790, - "##zhou": 9791, - "twist": 9792, - "##cio": 9793, - "joins": 9794, - "idaho": 9795, - "chad": 9796, - "developers": 9797, - "computing": 9798, - "destroyer": 9799, - "103": 9800, - "mortal": 9801, - "tucker": 9802, - "kingston": 9803, - "choices": 9804, - "yu": 9805, - "carson": 9806, - "1800": 9807, - "os": 9808, - "whitney": 9809, - "geneva": 9810, - "pretend": 9811, - "dimension": 9812, - "staged": 9813, - "plateau": 9814, - "maya": 9815, - "##une": 9816, - "freestyle": 9817, - "##bc": 9818, - "rovers": 9819, - "hiv": 9820, - "##ids": 9821, - "tristan": 9822, - "classroom": 9823, - "prospect": 9824, - "##hus": 9825, - "honestly": 9826, - "diploma": 9827, - "lied": 9828, - "thermal": 9829, - "auxiliary": 9830, - "feast": 9831, - "unlikely": 9832, - "iata": 9833, - "##tel": 9834, - "morocco": 9835, - "pounding": 9836, - "treasury": 9837, - "lithuania": 9838, - "considerably": 9839, - "1841": 9840, - "dish": 9841, - "1812": 9842, - "geological": 9843, - "matching": 9844, - "stumbled": 9845, - "destroying": 9846, - "marched": 9847, - "brien": 9848, - "advances": 9849, - "cake": 9850, - "nicole": 9851, - "belle": 9852, - "settling": 9853, - "measuring": 9854, - "directing": 9855, - "##mie": 9856, - "tuesday": 9857, - "bassist": 9858, - "capabilities": 9859, - "stunned": 9860, - "fraud": 9861, - "torpedo": 9862, - "##list": 9863, - "##phone": 9864, - "anton": 9865, - "wisdom": 9866, - "surveillance": 9867, - "ruined": 9868, - "##ulate": 9869, - "lawsuit": 9870, - "healthcare": 9871, - "theorem": 9872, - "halls": 9873, - "trend": 9874, - "aka": 9875, - "horizontal": 9876, - "dozens": 9877, - "acquire": 9878, - "lasting": 9879, - "swim": 9880, - "hawk": 9881, - "gorgeous": 9882, - "fees": 9883, - "vicinity": 9884, - "decrease": 9885, - "adoption": 9886, - "tactics": 9887, - "##ography": 9888, - "pakistani": 9889, - "##ole": 9890, - "draws": 9891, - "##hall": 9892, - "willie": 9893, - "burke": 9894, - "heath": 9895, - "algorithm": 9896, - "integral": 9897, - "powder": 9898, - "elliott": 9899, - "brigadier": 9900, - "jackie": 9901, - "tate": 9902, - "varieties": 9903, - "darker": 9904, - "##cho": 9905, - "lately": 9906, - "cigarette": 9907, - "specimens": 9908, - "adds": 9909, - "##ree": 9910, - "##ensis": 9911, - "##inger": 9912, - "exploded": 9913, - "finalist": 9914, - "cia": 9915, - "murders": 9916, - "wilderness": 9917, - "arguments": 9918, - "nicknamed": 9919, - "acceptance": 9920, - "onwards": 9921, - "manufacture": 9922, - "robertson": 9923, - "jets": 9924, - "tampa": 9925, - "enterprises": 9926, - "blog": 9927, - "loudly": 9928, - "composers": 9929, - "nominations": 9930, - "1838": 9931, - "ai": 9932, - "malta": 9933, - "inquiry": 9934, - "automobile": 9935, - "hosting": 9936, - "viii": 9937, - "rays": 9938, - "tilted": 9939, - "grief": 9940, - "museums": 9941, - "strategies": 9942, - "furious": 9943, - "euro": 9944, - "equality": 9945, - "cohen": 9946, - "poison": 9947, - "surrey": 9948, - "wireless": 9949, - "governed": 9950, - "ridiculous": 9951, - "moses": 9952, - "##esh": 9953, - "##room": 9954, - "vanished": 9955, - "##ito": 9956, - "barnes": 9957, - "attract": 9958, - "morrison": 9959, - "istanbul": 9960, - "##iness": 9961, - "absent": 9962, - "rotation": 9963, - "petition": 9964, - "janet": 9965, - "##logical": 9966, - "satisfaction": 9967, - "custody": 9968, - "deliberately": 9969, - "observatory": 9970, - "comedian": 9971, - "surfaces": 9972, - "pinyin": 9973, - "novelist": 9974, - "strictly": 9975, - "canterbury": 9976, - "oslo": 9977, - "monks": 9978, - "embrace": 9979, - "ibm": 9980, - "jealous": 9981, - "photograph": 9982, - "continent": 9983, - "dorothy": 9984, - "marina": 9985, - "doc": 9986, - "excess": 9987, - "holden": 9988, - "allegations": 9989, - "explaining": 9990, - "stack": 9991, - "avoiding": 9992, - "lance": 9993, - "storyline": 9994, - "majesty": 9995, - "poorly": 9996, - "spike": 9997, - "dos": 9998, - "bradford": 9999, - "raven": 10000, - "travis": 10001, - "classics": 10002, - "proven": 10003, - "voltage": 10004, - "pillow": 10005, - "fists": 10006, - "butt": 10007, - "1842": 10008, - "interpreted": 10009, - "##car": 10010, - "1839": 10011, - "gage": 10012, - "telegraph": 10013, - "lens": 10014, - "promising": 10015, - "expelled": 10016, - "casual": 10017, - "collector": 10018, - "zones": 10019, - "##min": 10020, - "silly": 10021, - "nintendo": 10022, - "##kh": 10023, - "##bra": 10024, - "downstairs": 10025, - "chef": 10026, - "suspicious": 10027, - "afl": 10028, - "flies": 10029, - "vacant": 10030, - "uganda": 10031, - "pregnancy": 10032, - "condemned": 10033, - "lutheran": 10034, - "estimates": 10035, - "cheap": 10036, - "decree": 10037, - "saxon": 10038, - "proximity": 10039, - "stripped": 10040, - "idiot": 10041, - "deposits": 10042, - "contrary": 10043, - "presenter": 10044, - "magnus": 10045, - "glacier": 10046, - "im": 10047, - "offense": 10048, - "edwin": 10049, - "##ori": 10050, - "upright": 10051, - "##long": 10052, - "bolt": 10053, - "##ois": 10054, - "toss": 10055, - "geographical": 10056, - "##izes": 10057, - "environments": 10058, - "delicate": 10059, - "marking": 10060, - "abstract": 10061, - "xavier": 10062, - "nails": 10063, - "windsor": 10064, - "plantation": 10065, - "occurring": 10066, - "equity": 10067, - "saskatchewan": 10068, - "fears": 10069, - "drifted": 10070, - "sequences": 10071, - "vegetation": 10072, - "revolt": 10073, - "##stic": 10074, - "1843": 10075, - "sooner": 10076, - "fusion": 10077, - "opposing": 10078, - "nato": 10079, - "skating": 10080, - "1836": 10081, - "secretly": 10082, - "ruin": 10083, - "lease": 10084, - "##oc": 10085, - "edit": 10086, - "##nne": 10087, - "flora": 10088, - "anxiety": 10089, - "ruby": 10090, - "##ological": 10091, - "##mia": 10092, - "tel": 10093, - "bout": 10094, - "taxi": 10095, - "emmy": 10096, - "frost": 10097, - "rainbow": 10098, - "compounds": 10099, - "foundations": 10100, - "rainfall": 10101, - "assassination": 10102, - "nightmare": 10103, - "dominican": 10104, - "##win": 10105, - "achievements": 10106, - "deserve": 10107, - "orlando": 10108, - "intact": 10109, - "armenia": 10110, - "##nte": 10111, - "calgary": 10112, - "valentine": 10113, - "106": 10114, - "marion": 10115, - "proclaimed": 10116, - "theodore": 10117, - "bells": 10118, - "courtyard": 10119, - "thigh": 10120, - "gonzalez": 10121, - "console": 10122, - "troop": 10123, - "minimal": 10124, - "monte": 10125, - "everyday": 10126, - "##ence": 10127, - "##if": 10128, - "supporter": 10129, - "terrorism": 10130, - "buck": 10131, - "openly": 10132, - "presbyterian": 10133, - "activists": 10134, - "carpet": 10135, - "##iers": 10136, - "rubbing": 10137, - "uprising": 10138, - "##yi": 10139, - "cute": 10140, - "conceived": 10141, - "legally": 10142, - "##cht": 10143, - "millennium": 10144, - "cello": 10145, - "velocity": 10146, - "ji": 10147, - "rescued": 10148, - "cardiff": 10149, - "1835": 10150, - "rex": 10151, - "concentrate": 10152, - "senators": 10153, - "beard": 10154, - "rendered": 10155, - "glowing": 10156, - "battalions": 10157, - "scouts": 10158, - "competitors": 10159, - "sculptor": 10160, - "catalogue": 10161, - "arctic": 10162, - "ion": 10163, - "raja": 10164, - "bicycle": 10165, - "wow": 10166, - "glancing": 10167, - "lawn": 10168, - "##woman": 10169, - "gentleman": 10170, - "lighthouse": 10171, - "publish": 10172, - "predicted": 10173, - "calculated": 10174, - "##val": 10175, - "variants": 10176, - "##gne": 10177, - "strain": 10178, - "##ui": 10179, - "winston": 10180, - "deceased": 10181, - "##nus": 10182, - "touchdowns": 10183, - "brady": 10184, - "caleb": 10185, - "sinking": 10186, - "echoed": 10187, - "crush": 10188, - "hon": 10189, - "blessed": 10190, - "protagonist": 10191, - "hayes": 10192, - "endangered": 10193, - "magnitude": 10194, - "editors": 10195, - "##tine": 10196, - "estimate": 10197, - "responsibilities": 10198, - "##mel": 10199, - "backup": 10200, - "laying": 10201, - "consumed": 10202, - "sealed": 10203, - "zurich": 10204, - "lovers": 10205, - "frustrated": 10206, - "##eau": 10207, - "ahmed": 10208, - "kicking": 10209, - "mit": 10210, - "treasurer": 10211, - "1832": 10212, - "biblical": 10213, - "refuse": 10214, - "terrified": 10215, - "pump": 10216, - "agrees": 10217, - "genuine": 10218, - "imprisonment": 10219, - "refuses": 10220, - "plymouth": 10221, - "##hen": 10222, - "lou": 10223, - "##nen": 10224, - "tara": 10225, - "trembling": 10226, - "antarctic": 10227, - "ton": 10228, - "learns": 10229, - "##tas": 10230, - "crap": 10231, - "crucial": 10232, - "faction": 10233, - "atop": 10234, - "##borough": 10235, - "wrap": 10236, - "lancaster": 10237, - "odds": 10238, - "hopkins": 10239, - "erik": 10240, - "lyon": 10241, - "##eon": 10242, - "bros": 10243, - "##ode": 10244, - "snap": 10245, - "locality": 10246, - "tips": 10247, - "empress": 10248, - "crowned": 10249, - "cal": 10250, - "acclaimed": 10251, - "chuckled": 10252, - "##ory": 10253, - "clara": 10254, - "sends": 10255, - "mild": 10256, - "towel": 10257, - "##fl": 10258, - "##day": 10259, - "##а": 10260, - "wishing": 10261, - "assuming": 10262, - "interviewed": 10263, - "##bal": 10264, - "##die": 10265, - "interactions": 10266, - "eden": 10267, - "cups": 10268, - "helena": 10269, - "##lf": 10270, - "indie": 10271, - "beck": 10272, - "##fire": 10273, - "batteries": 10274, - "filipino": 10275, - "wizard": 10276, - "parted": 10277, - "##lam": 10278, - "traces": 10279, - "##born": 10280, - "rows": 10281, - "idol": 10282, - "albany": 10283, - "delegates": 10284, - "##ees": 10285, - "##sar": 10286, - "discussions": 10287, - "##ex": 10288, - "notre": 10289, - "instructed": 10290, - "belgrade": 10291, - "highways": 10292, - "suggestion": 10293, - "lauren": 10294, - "possess": 10295, - "orientation": 10296, - "alexandria": 10297, - "abdul": 10298, - "beats": 10299, - "salary": 10300, - "reunion": 10301, - "ludwig": 10302, - "alright": 10303, - "wagner": 10304, - "intimate": 10305, - "pockets": 10306, - "slovenia": 10307, - "hugged": 10308, - "brighton": 10309, - "merchants": 10310, - "cruel": 10311, - "stole": 10312, - "trek": 10313, - "slopes": 10314, - "repairs": 10315, - "enrollment": 10316, - "politically": 10317, - "underlying": 10318, - "promotional": 10319, - "counting": 10320, - "boeing": 10321, - "##bb": 10322, - "isabella": 10323, - "naming": 10324, - "##и": 10325, - "keen": 10326, - "bacteria": 10327, - "listing": 10328, - "separately": 10329, - "belfast": 10330, - "ussr": 10331, - "450": 10332, - "lithuanian": 10333, - "anybody": 10334, - "ribs": 10335, - "sphere": 10336, - "martinez": 10337, - "cock": 10338, - "embarrassed": 10339, - "proposals": 10340, - "fragments": 10341, - "nationals": 10342, - "##fs": 10343, - "##wski": 10344, - "premises": 10345, - "fin": 10346, - "1500": 10347, - "alpine": 10348, - "matched": 10349, - "freely": 10350, - "bounded": 10351, - "jace": 10352, - "sleeve": 10353, - "##af": 10354, - "gaming": 10355, - "pier": 10356, - "populated": 10357, - "evident": 10358, - "##like": 10359, - "frances": 10360, - "flooded": 10361, - "##dle": 10362, - "frightened": 10363, - "pour": 10364, - "trainer": 10365, - "framed": 10366, - "visitor": 10367, - "challenging": 10368, - "pig": 10369, - "wickets": 10370, - "##fold": 10371, - "infected": 10372, - "email": 10373, - "##pes": 10374, - "arose": 10375, - "##aw": 10376, - "reward": 10377, - "ecuador": 10378, - "oblast": 10379, - "vale": 10380, - "ch": 10381, - "shuttle": 10382, - "##usa": 10383, - "bach": 10384, - "rankings": 10385, - "forbidden": 10386, - "cornwall": 10387, - "accordance": 10388, - "salem": 10389, - "consumers": 10390, - "bruno": 10391, - "fantastic": 10392, - "toes": 10393, - "machinery": 10394, - "resolved": 10395, - "julius": 10396, - "remembering": 10397, - "propaganda": 10398, - "iceland": 10399, - "bombardment": 10400, - "tide": 10401, - "contacts": 10402, - "wives": 10403, - "##rah": 10404, - "concerto": 10405, - "macdonald": 10406, - "albania": 10407, - "implement": 10408, - "daisy": 10409, - "tapped": 10410, - "sudan": 10411, - "helmet": 10412, - "angela": 10413, - "mistress": 10414, - "##lic": 10415, - "crop": 10416, - "sunk": 10417, - "finest": 10418, - "##craft": 10419, - "hostile": 10420, - "##ute": 10421, - "##tsu": 10422, - "boxer": 10423, - "fr": 10424, - "paths": 10425, - "adjusted": 10426, - "habit": 10427, - "ballot": 10428, - "supervision": 10429, - "soprano": 10430, - "##zen": 10431, - "bullets": 10432, - "wicked": 10433, - "sunset": 10434, - "regiments": 10435, - "disappear": 10436, - "lamp": 10437, - "performs": 10438, - "app": 10439, - "##gia": 10440, - "##oa": 10441, - "rabbit": 10442, - "digging": 10443, - "incidents": 10444, - "entries": 10445, - "##cion": 10446, - "dishes": 10447, - "##oi": 10448, - "introducing": 10449, - "##ati": 10450, - "##fied": 10451, - "freshman": 10452, - "slot": 10453, - "jill": 10454, - "tackles": 10455, - "baroque": 10456, - "backs": 10457, - "##iest": 10458, - "lone": 10459, - "sponsor": 10460, - "destiny": 10461, - "altogether": 10462, - "convert": 10463, - "##aro": 10464, - "consensus": 10465, - "shapes": 10466, - "demonstration": 10467, - "basically": 10468, - "feminist": 10469, - "auction": 10470, - "artifacts": 10471, - "##bing": 10472, - "strongest": 10473, - "twitter": 10474, - "halifax": 10475, - "2019": 10476, - "allmusic": 10477, - "mighty": 10478, - "smallest": 10479, - "precise": 10480, - "alexandra": 10481, - "viola": 10482, - "##los": 10483, - "##ille": 10484, - "manuscripts": 10485, - "##illo": 10486, - "dancers": 10487, - "ari": 10488, - "managers": 10489, - "monuments": 10490, - "blades": 10491, - "barracks": 10492, - "springfield": 10493, - "maiden": 10494, - "consolidated": 10495, - "electron": 10496, - "##end": 10497, - "berry": 10498, - "airing": 10499, - "wheat": 10500, - "nobel": 10501, - "inclusion": 10502, - "blair": 10503, - "payments": 10504, - "geography": 10505, - "bee": 10506, - "cc": 10507, - "eleanor": 10508, - "react": 10509, - "##hurst": 10510, - "afc": 10511, - "manitoba": 10512, - "##yu": 10513, - "su": 10514, - "lineup": 10515, - "fitness": 10516, - "recreational": 10517, - "investments": 10518, - "airborne": 10519, - "disappointment": 10520, - "##dis": 10521, - "edmonton": 10522, - "viewing": 10523, - "##row": 10524, - "renovation": 10525, - "##cast": 10526, - "infant": 10527, - "bankruptcy": 10528, - "roses": 10529, - "aftermath": 10530, - "pavilion": 10531, - "##yer": 10532, - "carpenter": 10533, - "withdrawal": 10534, - "ladder": 10535, - "##hy": 10536, - "discussing": 10537, - "popped": 10538, - "reliable": 10539, - "agreements": 10540, - "rochester": 10541, - "##abad": 10542, - "curves": 10543, - "bombers": 10544, - "220": 10545, - "rao": 10546, - "reverend": 10547, - "decreased": 10548, - "choosing": 10549, - "107": 10550, - "stiff": 10551, - "consulting": 10552, - "naples": 10553, - "crawford": 10554, - "tracy": 10555, - "ka": 10556, - "ribbon": 10557, - "cops": 10558, - "##lee": 10559, - "crushed": 10560, - "deciding": 10561, - "unified": 10562, - "teenager": 10563, - "accepting": 10564, - "flagship": 10565, - "explorer": 10566, - "poles": 10567, - "sanchez": 10568, - "inspection": 10569, - "revived": 10570, - "skilled": 10571, - "induced": 10572, - "exchanged": 10573, - "flee": 10574, - "locals": 10575, - "tragedy": 10576, - "swallow": 10577, - "loading": 10578, - "hanna": 10579, - "demonstrate": 10580, - "##ela": 10581, - "salvador": 10582, - "flown": 10583, - "contestants": 10584, - "civilization": 10585, - "##ines": 10586, - "wanna": 10587, - "rhodes": 10588, - "fletcher": 10589, - "hector": 10590, - "knocking": 10591, - "considers": 10592, - "##ough": 10593, - "nash": 10594, - "mechanisms": 10595, - "sensed": 10596, - "mentally": 10597, - "walt": 10598, - "unclear": 10599, - "##eus": 10600, - "renovated": 10601, - "madame": 10602, - "##cks": 10603, - "crews": 10604, - "governmental": 10605, - "##hin": 10606, - "undertaken": 10607, - "monkey": 10608, - "##ben": 10609, - "##ato": 10610, - "fatal": 10611, - "armored": 10612, - "copa": 10613, - "caves": 10614, - "governance": 10615, - "grasp": 10616, - "perception": 10617, - "certification": 10618, - "froze": 10619, - "damp": 10620, - "tugged": 10621, - "wyoming": 10622, - "##rg": 10623, - "##ero": 10624, - "newman": 10625, - "##lor": 10626, - "nerves": 10627, - "curiosity": 10628, - "graph": 10629, - "115": 10630, - "##ami": 10631, - "withdraw": 10632, - "tunnels": 10633, - "dull": 10634, - "meredith": 10635, - "moss": 10636, - "exhibits": 10637, - "neighbors": 10638, - "communicate": 10639, - "accuracy": 10640, - "explored": 10641, - "raiders": 10642, - "republicans": 10643, - "secular": 10644, - "kat": 10645, - "superman": 10646, - "penny": 10647, - "criticised": 10648, - "##tch": 10649, - "freed": 10650, - "update": 10651, - "conviction": 10652, - "wade": 10653, - "ham": 10654, - "likewise": 10655, - "delegation": 10656, - "gotta": 10657, - "doll": 10658, - "promises": 10659, - "technological": 10660, - "myth": 10661, - "nationality": 10662, - "resolve": 10663, - "convent": 10664, - "##mark": 10665, - "sharon": 10666, - "dig": 10667, - "sip": 10668, - "coordinator": 10669, - "entrepreneur": 10670, - "fold": 10671, - "##dine": 10672, - "capability": 10673, - "councillor": 10674, - "synonym": 10675, - "blown": 10676, - "swan": 10677, - "cursed": 10678, - "1815": 10679, - "jonas": 10680, - "haired": 10681, - "sofa": 10682, - "canvas": 10683, - "keeper": 10684, - "rivalry": 10685, - "##hart": 10686, - "rapper": 10687, - "speedway": 10688, - "swords": 10689, - "postal": 10690, - "maxwell": 10691, - "estonia": 10692, - "potter": 10693, - "recurring": 10694, - "##nn": 10695, - "##ave": 10696, - "errors": 10697, - "##oni": 10698, - "cognitive": 10699, - "1834": 10700, - "##²": 10701, - "claws": 10702, - "nadu": 10703, - "roberto": 10704, - "bce": 10705, - "wrestler": 10706, - "ellie": 10707, - "##ations": 10708, - "infinite": 10709, - "ink": 10710, - "##tia": 10711, - "presumably": 10712, - "finite": 10713, - "staircase": 10714, - "108": 10715, - "noel": 10716, - "patricia": 10717, - "nacional": 10718, - "##cation": 10719, - "chill": 10720, - "eternal": 10721, - "tu": 10722, - "preventing": 10723, - "prussia": 10724, - "fossil": 10725, - "limbs": 10726, - "##logist": 10727, - "ernst": 10728, - "frog": 10729, - "perez": 10730, - "rene": 10731, - "##ace": 10732, - "pizza": 10733, - "prussian": 10734, - "##ios": 10735, - "##vy": 10736, - "molecules": 10737, - "regulatory": 10738, - "answering": 10739, - "opinions": 10740, - "sworn": 10741, - "lengths": 10742, - "supposedly": 10743, - "hypothesis": 10744, - "upward": 10745, - "habitats": 10746, - "seating": 10747, - "ancestors": 10748, - "drank": 10749, - "yield": 10750, - "hd": 10751, - "synthesis": 10752, - "researcher": 10753, - "modest": 10754, - "##var": 10755, - "mothers": 10756, - "peered": 10757, - "voluntary": 10758, - "homeland": 10759, - "##the": 10760, - "acclaim": 10761, - "##igan": 10762, - "static": 10763, - "valve": 10764, - "luxembourg": 10765, - "alto": 10766, - "carroll": 10767, - "fe": 10768, - "receptor": 10769, - "norton": 10770, - "ambulance": 10771, - "##tian": 10772, - "johnston": 10773, - "catholics": 10774, - "depicting": 10775, - "jointly": 10776, - "elephant": 10777, - "gloria": 10778, - "mentor": 10779, - "badge": 10780, - "ahmad": 10781, - "distinguish": 10782, - "remarked": 10783, - "councils": 10784, - "precisely": 10785, - "allison": 10786, - "advancing": 10787, - "detection": 10788, - "crowded": 10789, - "##10": 10790, - "cooperative": 10791, - "ankle": 10792, - "mercedes": 10793, - "dagger": 10794, - "surrendered": 10795, - "pollution": 10796, - "commit": 10797, - "subway": 10798, - "jeffrey": 10799, - "lesson": 10800, - "sculptures": 10801, - "provider": 10802, - "##fication": 10803, - "membrane": 10804, - "timothy": 10805, - "rectangular": 10806, - "fiscal": 10807, - "heating": 10808, - "teammate": 10809, - "basket": 10810, - "particle": 10811, - "anonymous": 10812, - "deployment": 10813, - "##ple": 10814, - "missiles": 10815, - "courthouse": 10816, - "proportion": 10817, - "shoe": 10818, - "sec": 10819, - "##ller": 10820, - "complaints": 10821, - "forbes": 10822, - "blacks": 10823, - "abandon": 10824, - "remind": 10825, - "sizes": 10826, - "overwhelming": 10827, - "autobiography": 10828, - "natalie": 10829, - "##awa": 10830, - "risks": 10831, - "contestant": 10832, - "countryside": 10833, - "babies": 10834, - "scorer": 10835, - "invaded": 10836, - "enclosed": 10837, - "proceed": 10838, - "hurling": 10839, - "disorders": 10840, - "##cu": 10841, - "reflecting": 10842, - "continuously": 10843, - "cruiser": 10844, - "graduates": 10845, - "freeway": 10846, - "investigated": 10847, - "ore": 10848, - "deserved": 10849, - "maid": 10850, - "blocking": 10851, - "phillip": 10852, - "jorge": 10853, - "shakes": 10854, - "dove": 10855, - "mann": 10856, - "variables": 10857, - "lacked": 10858, - "burden": 10859, - "accompanying": 10860, - "que": 10861, - "consistently": 10862, - "organizing": 10863, - "provisional": 10864, - "complained": 10865, - "endless": 10866, - "##rm": 10867, - "tubes": 10868, - "juice": 10869, - "georges": 10870, - "krishna": 10871, - "mick": 10872, - "labels": 10873, - "thriller": 10874, - "##uch": 10875, - "laps": 10876, - "arcade": 10877, - "sage": 10878, - "snail": 10879, - "##table": 10880, - "shannon": 10881, - "fi": 10882, - "laurence": 10883, - "seoul": 10884, - "vacation": 10885, - "presenting": 10886, - "hire": 10887, - "churchill": 10888, - "surprisingly": 10889, - "prohibited": 10890, - "savannah": 10891, - "technically": 10892, - "##oli": 10893, - "170": 10894, - "##lessly": 10895, - "testimony": 10896, - "suited": 10897, - "speeds": 10898, - "toys": 10899, - "romans": 10900, - "mlb": 10901, - "flowering": 10902, - "measurement": 10903, - "talented": 10904, - "kay": 10905, - "settings": 10906, - "charleston": 10907, - "expectations": 10908, - "shattered": 10909, - "achieving": 10910, - "triumph": 10911, - "ceremonies": 10912, - "portsmouth": 10913, - "lanes": 10914, - "mandatory": 10915, - "loser": 10916, - "stretching": 10917, - "cologne": 10918, - "realizes": 10919, - "seventy": 10920, - "cornell": 10921, - "careers": 10922, - "webb": 10923, - "##ulating": 10924, - "americas": 10925, - "budapest": 10926, - "ava": 10927, - "suspicion": 10928, - "##ison": 10929, - "yo": 10930, - "conrad": 10931, - "##hai": 10932, - "sterling": 10933, - "jessie": 10934, - "rector": 10935, - "##az": 10936, - "1831": 10937, - "transform": 10938, - "organize": 10939, - "loans": 10940, - "christine": 10941, - "volcanic": 10942, - "warrant": 10943, - "slender": 10944, - "summers": 10945, - "subfamily": 10946, - "newer": 10947, - "danced": 10948, - "dynamics": 10949, - "rhine": 10950, - "proceeds": 10951, - "heinrich": 10952, - "gastropod": 10953, - "commands": 10954, - "sings": 10955, - "facilitate": 10956, - "easter": 10957, - "ra": 10958, - "positioned": 10959, - "responses": 10960, - "expense": 10961, - "fruits": 10962, - "yanked": 10963, - "imported": 10964, - "25th": 10965, - "velvet": 10966, - "vic": 10967, - "primitive": 10968, - "tribune": 10969, - "baldwin": 10970, - "neighbourhood": 10971, - "donna": 10972, - "rip": 10973, - "hay": 10974, - "pr": 10975, - "##uro": 10976, - "1814": 10977, - "espn": 10978, - "welcomed": 10979, - "##aria": 10980, - "qualifier": 10981, - "glare": 10982, - "highland": 10983, - "timing": 10984, - "##cted": 10985, - "shells": 10986, - "eased": 10987, - "geometry": 10988, - "louder": 10989, - "exciting": 10990, - "slovakia": 10991, - "##sion": 10992, - "##iz": 10993, - "##lot": 10994, - "savings": 10995, - "prairie": 10996, - "##ques": 10997, - "marching": 10998, - "rafael": 10999, - "tonnes": 11000, - "##lled": 11001, - "curtain": 11002, - "preceding": 11003, - "shy": 11004, - "heal": 11005, - "greene": 11006, - "worthy": 11007, - "##pot": 11008, - "detachment": 11009, - "bury": 11010, - "sherman": 11011, - "##eck": 11012, - "reinforced": 11013, - "seeks": 11014, - "bottles": 11015, - "contracted": 11016, - "duchess": 11017, - "outfit": 11018, - "walsh": 11019, - "##sc": 11020, - "mickey": 11021, - "##ase": 11022, - "geoffrey": 11023, - "archer": 11024, - "squeeze": 11025, - "dawson": 11026, - "eliminate": 11027, - "invention": 11028, - "##enberg": 11029, - "neal": 11030, - "##eth": 11031, - "stance": 11032, - "dealer": 11033, - "coral": 11034, - "maple": 11035, - "retire": 11036, - "polo": 11037, - "simplified": 11038, - "##ht": 11039, - "1833": 11040, - "hid": 11041, - "watts": 11042, - "backwards": 11043, - "jules": 11044, - "##oke": 11045, - "genesis": 11046, - "mt": 11047, - "frames": 11048, - "rebounds": 11049, - "burma": 11050, - "woodland": 11051, - "moist": 11052, - "santos": 11053, - "whispers": 11054, - "drained": 11055, - "subspecies": 11056, - "##aa": 11057, - "streaming": 11058, - "ulster": 11059, - "burnt": 11060, - "correspondence": 11061, - "maternal": 11062, - "gerard": 11063, - "denis": 11064, - "stealing": 11065, - "##load": 11066, - "genius": 11067, - "duchy": 11068, - "##oria": 11069, - "inaugurated": 11070, - "momentum": 11071, - "suits": 11072, - "placement": 11073, - "sovereign": 11074, - "clause": 11075, - "thames": 11076, - "##hara": 11077, - "confederation": 11078, - "reservation": 11079, - "sketch": 11080, - "yankees": 11081, - "lets": 11082, - "rotten": 11083, - "charm": 11084, - "hal": 11085, - "verses": 11086, - "ultra": 11087, - "commercially": 11088, - "dot": 11089, - "salon": 11090, - "citation": 11091, - "adopt": 11092, - "winnipeg": 11093, - "mist": 11094, - "allocated": 11095, - "cairo": 11096, - "##boy": 11097, - "jenkins": 11098, - "interference": 11099, - "objectives": 11100, - "##wind": 11101, - "1820": 11102, - "portfolio": 11103, - "armoured": 11104, - "sectors": 11105, - "##eh": 11106, - "initiatives": 11107, - "##world": 11108, - "integrity": 11109, - "exercises": 11110, - "robe": 11111, - "tap": 11112, - "ab": 11113, - "gazed": 11114, - "##tones": 11115, - "distracted": 11116, - "rulers": 11117, - "111": 11118, - "favorable": 11119, - "jerome": 11120, - "tended": 11121, - "cart": 11122, - "factories": 11123, - "##eri": 11124, - "diplomat": 11125, - "valued": 11126, - "gravel": 11127, - "charitable": 11128, - "##try": 11129, - "calvin": 11130, - "exploring": 11131, - "chang": 11132, - "shepherd": 11133, - "terrace": 11134, - "pdf": 11135, - "pupil": 11136, - "##ural": 11137, - "reflects": 11138, - "ups": 11139, - "##rch": 11140, - "governors": 11141, - "shelf": 11142, - "depths": 11143, - "##nberg": 11144, - "trailed": 11145, - "crest": 11146, - "tackle": 11147, - "##nian": 11148, - "##ats": 11149, - "hatred": 11150, - "##kai": 11151, - "clare": 11152, - "makers": 11153, - "ethiopia": 11154, - "longtime": 11155, - "detected": 11156, - "embedded": 11157, - "lacking": 11158, - "slapped": 11159, - "rely": 11160, - "thomson": 11161, - "anticipation": 11162, - "iso": 11163, - "morton": 11164, - "successive": 11165, - "agnes": 11166, - "screenwriter": 11167, - "straightened": 11168, - "philippe": 11169, - "playwright": 11170, - "haunted": 11171, - "licence": 11172, - "iris": 11173, - "intentions": 11174, - "sutton": 11175, - "112": 11176, - "logical": 11177, - "correctly": 11178, - "##weight": 11179, - "branded": 11180, - "licked": 11181, - "tipped": 11182, - "silva": 11183, - "ricky": 11184, - "narrator": 11185, - "requests": 11186, - "##ents": 11187, - "greeted": 11188, - "supernatural": 11189, - "cow": 11190, - "##wald": 11191, - "lung": 11192, - "refusing": 11193, - "employer": 11194, - "strait": 11195, - "gaelic": 11196, - "liner": 11197, - "##piece": 11198, - "zoe": 11199, - "sabha": 11200, - "##mba": 11201, - "driveway": 11202, - "harvest": 11203, - "prints": 11204, - "bates": 11205, - "reluctantly": 11206, - "threshold": 11207, - "algebra": 11208, - "ira": 11209, - "wherever": 11210, - "coupled": 11211, - "240": 11212, - "assumption": 11213, - "picks": 11214, - "##air": 11215, - "designers": 11216, - "raids": 11217, - "gentlemen": 11218, - "##ean": 11219, - "roller": 11220, - "blowing": 11221, - "leipzig": 11222, - "locks": 11223, - "screw": 11224, - "dressing": 11225, - "strand": 11226, - "##lings": 11227, - "scar": 11228, - "dwarf": 11229, - "depicts": 11230, - "##nu": 11231, - "nods": 11232, - "##mine": 11233, - "differ": 11234, - "boris": 11235, - "##eur": 11236, - "yuan": 11237, - "flip": 11238, - "##gie": 11239, - "mob": 11240, - "invested": 11241, - "questioning": 11242, - "applying": 11243, - "##ture": 11244, - "shout": 11245, - "##sel": 11246, - "gameplay": 11247, - "blamed": 11248, - "illustrations": 11249, - "bothered": 11250, - "weakness": 11251, - "rehabilitation": 11252, - "##of": 11253, - "##zes": 11254, - "envelope": 11255, - "rumors": 11256, - "miners": 11257, - "leicester": 11258, - "subtle": 11259, - "kerry": 11260, - "##ico": 11261, - "ferguson": 11262, - "##fu": 11263, - "premiership": 11264, - "ne": 11265, - "##cat": 11266, - "bengali": 11267, - "prof": 11268, - "catches": 11269, - "remnants": 11270, - "dana": 11271, - "##rily": 11272, - "shouting": 11273, - "presidents": 11274, - "baltic": 11275, - "ought": 11276, - "ghosts": 11277, - "dances": 11278, - "sailors": 11279, - "shirley": 11280, - "fancy": 11281, - "dominic": 11282, - "##bie": 11283, - "madonna": 11284, - "##rick": 11285, - "bark": 11286, - "buttons": 11287, - "gymnasium": 11288, - "ashes": 11289, - "liver": 11290, - "toby": 11291, - "oath": 11292, - "providence": 11293, - "doyle": 11294, - "evangelical": 11295, - "nixon": 11296, - "cement": 11297, - "carnegie": 11298, - "embarked": 11299, - "hatch": 11300, - "surroundings": 11301, - "guarantee": 11302, - "needing": 11303, - "pirate": 11304, - "essence": 11305, - "##bee": 11306, - "filter": 11307, - "crane": 11308, - "hammond": 11309, - "projected": 11310, - "immune": 11311, - "percy": 11312, - "twelfth": 11313, - "##ult": 11314, - "regent": 11315, - "doctoral": 11316, - "damon": 11317, - "mikhail": 11318, - "##ichi": 11319, - "lu": 11320, - "critically": 11321, - "elect": 11322, - "realised": 11323, - "abortion": 11324, - "acute": 11325, - "screening": 11326, - "mythology": 11327, - "steadily": 11328, - "##fc": 11329, - "frown": 11330, - "nottingham": 11331, - "kirk": 11332, - "wa": 11333, - "minneapolis": 11334, - "##rra": 11335, - "module": 11336, - "algeria": 11337, - "mc": 11338, - "nautical": 11339, - "encounters": 11340, - "surprising": 11341, - "statues": 11342, - "availability": 11343, - "shirts": 11344, - "pie": 11345, - "alma": 11346, - "brows": 11347, - "munster": 11348, - "mack": 11349, - "soup": 11350, - "crater": 11351, - "tornado": 11352, - "sanskrit": 11353, - "cedar": 11354, - "explosive": 11355, - "bordered": 11356, - "dixon": 11357, - "planets": 11358, - "stamp": 11359, - "exam": 11360, - "happily": 11361, - "##bble": 11362, - "carriers": 11363, - "kidnapped": 11364, - "##vis": 11365, - "accommodation": 11366, - "emigrated": 11367, - "##met": 11368, - "knockout": 11369, - "correspondent": 11370, - "violation": 11371, - "profits": 11372, - "peaks": 11373, - "lang": 11374, - "specimen": 11375, - "agenda": 11376, - "ancestry": 11377, - "pottery": 11378, - "spelling": 11379, - "equations": 11380, - "obtaining": 11381, - "ki": 11382, - "linking": 11383, - "1825": 11384, - "debris": 11385, - "asylum": 11386, - "##20": 11387, - "buddhism": 11388, - "teddy": 11389, - "##ants": 11390, - "gazette": 11391, - "##nger": 11392, - "##sse": 11393, - "dental": 11394, - "eligibility": 11395, - "utc": 11396, - "fathers": 11397, - "averaged": 11398, - "zimbabwe": 11399, - "francesco": 11400, - "coloured": 11401, - "hissed": 11402, - "translator": 11403, - "lynch": 11404, - "mandate": 11405, - "humanities": 11406, - "mackenzie": 11407, - "uniforms": 11408, - "lin": 11409, - "##iana": 11410, - "##gio": 11411, - "asset": 11412, - "mhz": 11413, - "fitting": 11414, - "samantha": 11415, - "genera": 11416, - "wei": 11417, - "rim": 11418, - "beloved": 11419, - "shark": 11420, - "riot": 11421, - "entities": 11422, - "expressions": 11423, - "indo": 11424, - "carmen": 11425, - "slipping": 11426, - "owing": 11427, - "abbot": 11428, - "neighbor": 11429, - "sidney": 11430, - "##av": 11431, - "rats": 11432, - "recommendations": 11433, - "encouraging": 11434, - "squadrons": 11435, - "anticipated": 11436, - "commanders": 11437, - "conquered": 11438, - "##oto": 11439, - "donations": 11440, - "diagnosed": 11441, - "##mond": 11442, - "divide": 11443, - "##iva": 11444, - "guessed": 11445, - "decoration": 11446, - "vernon": 11447, - "auditorium": 11448, - "revelation": 11449, - "conversations": 11450, - "##kers": 11451, - "##power": 11452, - "herzegovina": 11453, - "dash": 11454, - "alike": 11455, - "protested": 11456, - "lateral": 11457, - "herman": 11458, - "accredited": 11459, - "mg": 11460, - "##gent": 11461, - "freeman": 11462, - "mel": 11463, - "fiji": 11464, - "crow": 11465, - "crimson": 11466, - "##rine": 11467, - "livestock": 11468, - "##pped": 11469, - "humanitarian": 11470, - "bored": 11471, - "oz": 11472, - "whip": 11473, - "##lene": 11474, - "##ali": 11475, - "legitimate": 11476, - "alter": 11477, - "grinning": 11478, - "spelled": 11479, - "anxious": 11480, - "oriental": 11481, - "wesley": 11482, - "##nin": 11483, - "##hole": 11484, - "carnival": 11485, - "controller": 11486, - "detect": 11487, - "##ssa": 11488, - "bowed": 11489, - "educator": 11490, - "kosovo": 11491, - "macedonia": 11492, - "##sin": 11493, - "occupy": 11494, - "mastering": 11495, - "stephanie": 11496, - "janeiro": 11497, - "para": 11498, - "unaware": 11499, - "nurses": 11500, - "noon": 11501, - "135": 11502, - "cam": 11503, - "hopefully": 11504, - "ranger": 11505, - "combine": 11506, - "sociology": 11507, - "polar": 11508, - "rica": 11509, - "##eer": 11510, - "neill": 11511, - "##sman": 11512, - "holocaust": 11513, - "##ip": 11514, - "doubled": 11515, - "lust": 11516, - "1828": 11517, - "109": 11518, - "decent": 11519, - "cooling": 11520, - "unveiled": 11521, - "##card": 11522, - "1829": 11523, - "nsw": 11524, - "homer": 11525, - "chapman": 11526, - "meyer": 11527, - "##gin": 11528, - "dive": 11529, - "mae": 11530, - "reagan": 11531, - "expertise": 11532, - "##gled": 11533, - "darwin": 11534, - "brooke": 11535, - "sided": 11536, - "prosecution": 11537, - "investigating": 11538, - "comprised": 11539, - "petroleum": 11540, - "genres": 11541, - "reluctant": 11542, - "differently": 11543, - "trilogy": 11544, - "johns": 11545, - "vegetables": 11546, - "corpse": 11547, - "highlighted": 11548, - "lounge": 11549, - "pension": 11550, - "unsuccessfully": 11551, - "elegant": 11552, - "aided": 11553, - "ivory": 11554, - "beatles": 11555, - "amelia": 11556, - "cain": 11557, - "dubai": 11558, - "sunny": 11559, - "immigrant": 11560, - "babe": 11561, - "click": 11562, - "##nder": 11563, - "underwater": 11564, - "pepper": 11565, - "combining": 11566, - "mumbled": 11567, - "atlas": 11568, - "horns": 11569, - "accessed": 11570, - "ballad": 11571, - "physicians": 11572, - "homeless": 11573, - "gestured": 11574, - "rpm": 11575, - "freak": 11576, - "louisville": 11577, - "corporations": 11578, - "patriots": 11579, - "prizes": 11580, - "rational": 11581, - "warn": 11582, - "modes": 11583, - "decorative": 11584, - "overnight": 11585, - "din": 11586, - "troubled": 11587, - "phantom": 11588, - "##ort": 11589, - "monarch": 11590, - "sheer": 11591, - "##dorf": 11592, - "generals": 11593, - "guidelines": 11594, - "organs": 11595, - "addresses": 11596, - "##zon": 11597, - "enhance": 11598, - "curling": 11599, - "parishes": 11600, - "cord": 11601, - "##kie": 11602, - "linux": 11603, - "caesar": 11604, - "deutsche": 11605, - "bavaria": 11606, - "##bia": 11607, - "coleman": 11608, - "cyclone": 11609, - "##eria": 11610, - "bacon": 11611, - "petty": 11612, - "##yama": 11613, - "##old": 11614, - "hampton": 11615, - "diagnosis": 11616, - "1824": 11617, - "throws": 11618, - "complexity": 11619, - "rita": 11620, - "disputed": 11621, - "##₃": 11622, - "pablo": 11623, - "##sch": 11624, - "marketed": 11625, - "trafficking": 11626, - "##ulus": 11627, - "examine": 11628, - "plague": 11629, - "formats": 11630, - "##oh": 11631, - "vault": 11632, - "faithful": 11633, - "##bourne": 11634, - "webster": 11635, - "##ox": 11636, - "highlights": 11637, - "##ient": 11638, - "##ann": 11639, - "phones": 11640, - "vacuum": 11641, - "sandwich": 11642, - "modeling": 11643, - "##gated": 11644, - "bolivia": 11645, - "clergy": 11646, - "qualities": 11647, - "isabel": 11648, - "##nas": 11649, - "##ars": 11650, - "wears": 11651, - "screams": 11652, - "reunited": 11653, - "annoyed": 11654, - "bra": 11655, - "##ancy": 11656, - "##rate": 11657, - "differential": 11658, - "transmitter": 11659, - "tattoo": 11660, - "container": 11661, - "poker": 11662, - "##och": 11663, - "excessive": 11664, - "resides": 11665, - "cowboys": 11666, - "##tum": 11667, - "augustus": 11668, - "trash": 11669, - "providers": 11670, - "statute": 11671, - "retreated": 11672, - "balcony": 11673, - "reversed": 11674, - "void": 11675, - "storey": 11676, - "preceded": 11677, - "masses": 11678, - "leap": 11679, - "laughs": 11680, - "neighborhoods": 11681, - "wards": 11682, - "schemes": 11683, - "falcon": 11684, - "santo": 11685, - "battlefield": 11686, - "pad": 11687, - "ronnie": 11688, - "thread": 11689, - "lesbian": 11690, - "venus": 11691, - "##dian": 11692, - "beg": 11693, - "sandstone": 11694, - "daylight": 11695, - "punched": 11696, - "gwen": 11697, - "analog": 11698, - "stroked": 11699, - "wwe": 11700, - "acceptable": 11701, - "measurements": 11702, - "dec": 11703, - "toxic": 11704, - "##kel": 11705, - "adequate": 11706, - "surgical": 11707, - "economist": 11708, - "parameters": 11709, - "varsity": 11710, - "##sberg": 11711, - "quantity": 11712, - "ella": 11713, - "##chy": 11714, - "##rton": 11715, - "countess": 11716, - "generating": 11717, - "precision": 11718, - "diamonds": 11719, - "expressway": 11720, - "ga": 11721, - "##ı": 11722, - "1821": 11723, - "uruguay": 11724, - "talents": 11725, - "galleries": 11726, - "expenses": 11727, - "scanned": 11728, - "colleague": 11729, - "outlets": 11730, - "ryder": 11731, - "lucien": 11732, - "##ila": 11733, - "paramount": 11734, - "##bon": 11735, - "syracuse": 11736, - "dim": 11737, - "fangs": 11738, - "gown": 11739, - "sweep": 11740, - "##sie": 11741, - "toyota": 11742, - "missionaries": 11743, - "websites": 11744, - "##nsis": 11745, - "sentences": 11746, - "adviser": 11747, - "val": 11748, - "trademark": 11749, - "spells": 11750, - "##plane": 11751, - "patience": 11752, - "starter": 11753, - "slim": 11754, - "##borg": 11755, - "toe": 11756, - "incredibly": 11757, - "shoots": 11758, - "elliot": 11759, - "nobility": 11760, - "##wyn": 11761, - "cowboy": 11762, - "endorsed": 11763, - "gardner": 11764, - "tendency": 11765, - "persuaded": 11766, - "organisms": 11767, - "emissions": 11768, - "kazakhstan": 11769, - "amused": 11770, - "boring": 11771, - "chips": 11772, - "themed": 11773, - "##hand": 11774, - "llc": 11775, - "constantinople": 11776, - "chasing": 11777, - "systematic": 11778, - "guatemala": 11779, - "borrowed": 11780, - "erin": 11781, - "carey": 11782, - "##hard": 11783, - "highlands": 11784, - "struggles": 11785, - "1810": 11786, - "##ifying": 11787, - "##ced": 11788, - "wong": 11789, - "exceptions": 11790, - "develops": 11791, - "enlarged": 11792, - "kindergarten": 11793, - "castro": 11794, - "##ern": 11795, - "##rina": 11796, - "leigh": 11797, - "zombie": 11798, - "juvenile": 11799, - "##most": 11800, - "consul": 11801, - "##nar": 11802, - "sailor": 11803, - "hyde": 11804, - "clarence": 11805, - "intensive": 11806, - "pinned": 11807, - "nasty": 11808, - "useless": 11809, - "jung": 11810, - "clayton": 11811, - "stuffed": 11812, - "exceptional": 11813, - "ix": 11814, - "apostolic": 11815, - "230": 11816, - "transactions": 11817, - "##dge": 11818, - "exempt": 11819, - "swinging": 11820, - "cove": 11821, - "religions": 11822, - "##ash": 11823, - "shields": 11824, - "dairy": 11825, - "bypass": 11826, - "190": 11827, - "pursuing": 11828, - "bug": 11829, - "joyce": 11830, - "bombay": 11831, - "chassis": 11832, - "southampton": 11833, - "chat": 11834, - "interact": 11835, - "redesignated": 11836, - "##pen": 11837, - "nascar": 11838, - "pray": 11839, - "salmon": 11840, - "rigid": 11841, - "regained": 11842, - "malaysian": 11843, - "grim": 11844, - "publicity": 11845, - "constituted": 11846, - "capturing": 11847, - "toilet": 11848, - "delegate": 11849, - "purely": 11850, - "tray": 11851, - "drift": 11852, - "loosely": 11853, - "striker": 11854, - "weakened": 11855, - "trinidad": 11856, - "mitch": 11857, - "itv": 11858, - "defines": 11859, - "transmitted": 11860, - "ming": 11861, - "scarlet": 11862, - "nodding": 11863, - "fitzgerald": 11864, - "fu": 11865, - "narrowly": 11866, - "sp": 11867, - "tooth": 11868, - "standings": 11869, - "virtue": 11870, - "##₁": 11871, - "##wara": 11872, - "##cting": 11873, - "chateau": 11874, - "gloves": 11875, - "lid": 11876, - "##nel": 11877, - "hurting": 11878, - "conservatory": 11879, - "##pel": 11880, - "sinclair": 11881, - "reopened": 11882, - "sympathy": 11883, - "nigerian": 11884, - "strode": 11885, - "advocated": 11886, - "optional": 11887, - "chronic": 11888, - "discharge": 11889, - "##rc": 11890, - "suck": 11891, - "compatible": 11892, - "laurel": 11893, - "stella": 11894, - "shi": 11895, - "fails": 11896, - "wage": 11897, - "dodge": 11898, - "128": 11899, - "informal": 11900, - "sorts": 11901, - "levi": 11902, - "buddha": 11903, - "villagers": 11904, - "##aka": 11905, - "chronicles": 11906, - "heavier": 11907, - "summoned": 11908, - "gateway": 11909, - "3000": 11910, - "eleventh": 11911, - "jewelry": 11912, - "translations": 11913, - "accordingly": 11914, - "seas": 11915, - "##ency": 11916, - "fiber": 11917, - "pyramid": 11918, - "cubic": 11919, - "dragging": 11920, - "##ista": 11921, - "caring": 11922, - "##ops": 11923, - "android": 11924, - "contacted": 11925, - "lunar": 11926, - "##dt": 11927, - "kai": 11928, - "lisbon": 11929, - "patted": 11930, - "1826": 11931, - "sacramento": 11932, - "theft": 11933, - "madagascar": 11934, - "subtropical": 11935, - "disputes": 11936, - "ta": 11937, - "holidays": 11938, - "piper": 11939, - "willow": 11940, - "mare": 11941, - "cane": 11942, - "itunes": 11943, - "newfoundland": 11944, - "benny": 11945, - "companions": 11946, - "dong": 11947, - "raj": 11948, - "observe": 11949, - "roar": 11950, - "charming": 11951, - "plaque": 11952, - "tibetan": 11953, - "fossils": 11954, - "enacted": 11955, - "manning": 11956, - "bubble": 11957, - "tina": 11958, - "tanzania": 11959, - "##eda": 11960, - "##hir": 11961, - "funk": 11962, - "swamp": 11963, - "deputies": 11964, - "cloak": 11965, - "ufc": 11966, - "scenario": 11967, - "par": 11968, - "scratch": 11969, - "metals": 11970, - "anthem": 11971, - "guru": 11972, - "engaging": 11973, - "specially": 11974, - "##boat": 11975, - "dialects": 11976, - "nineteen": 11977, - "cecil": 11978, - "duet": 11979, - "disability": 11980, - "messenger": 11981, - "unofficial": 11982, - "##lies": 11983, - "defunct": 11984, - "eds": 11985, - "moonlight": 11986, - "drainage": 11987, - "surname": 11988, - "puzzle": 11989, - "honda": 11990, - "switching": 11991, - "conservatives": 11992, - "mammals": 11993, - "knox": 11994, - "broadcaster": 11995, - "sidewalk": 11996, - "cope": 11997, - "##ried": 11998, - "benson": 11999, - "princes": 12000, - "peterson": 12001, - "##sal": 12002, - "bedford": 12003, - "sharks": 12004, - "eli": 12005, - "wreck": 12006, - "alberto": 12007, - "gasp": 12008, - "archaeology": 12009, - "lgbt": 12010, - "teaches": 12011, - "securities": 12012, - "madness": 12013, - "compromise": 12014, - "waving": 12015, - "coordination": 12016, - "davidson": 12017, - "visions": 12018, - "leased": 12019, - "possibilities": 12020, - "eighty": 12021, - "jun": 12022, - "fernandez": 12023, - "enthusiasm": 12024, - "assassin": 12025, - "sponsorship": 12026, - "reviewer": 12027, - "kingdoms": 12028, - "estonian": 12029, - "laboratories": 12030, - "##fy": 12031, - "##nal": 12032, - "applies": 12033, - "verb": 12034, - "celebrations": 12035, - "##zzo": 12036, - "rowing": 12037, - "lightweight": 12038, - "sadness": 12039, - "submit": 12040, - "mvp": 12041, - "balanced": 12042, - "dude": 12043, - "##vas": 12044, - "explicitly": 12045, - "metric": 12046, - "magnificent": 12047, - "mound": 12048, - "brett": 12049, - "mohammad": 12050, - "mistakes": 12051, - "irregular": 12052, - "##hing": 12053, - "##ass": 12054, - "sanders": 12055, - "betrayed": 12056, - "shipped": 12057, - "surge": 12058, - "##enburg": 12059, - "reporters": 12060, - "termed": 12061, - "georg": 12062, - "pity": 12063, - "verbal": 12064, - "bulls": 12065, - "abbreviated": 12066, - "enabling": 12067, - "appealed": 12068, - "##are": 12069, - "##atic": 12070, - "sicily": 12071, - "sting": 12072, - "heel": 12073, - "sweetheart": 12074, - "bart": 12075, - "spacecraft": 12076, - "brutal": 12077, - "monarchy": 12078, - "##tter": 12079, - "aberdeen": 12080, - "cameo": 12081, - "diane": 12082, - "##ub": 12083, - "survivor": 12084, - "clyde": 12085, - "##aries": 12086, - "complaint": 12087, - "##makers": 12088, - "clarinet": 12089, - "delicious": 12090, - "chilean": 12091, - "karnataka": 12092, - "coordinates": 12093, - "1818": 12094, - "panties": 12095, - "##rst": 12096, - "pretending": 12097, - "ar": 12098, - "dramatically": 12099, - "kiev": 12100, - "bella": 12101, - "tends": 12102, - "distances": 12103, - "113": 12104, - "catalog": 12105, - "launching": 12106, - "instances": 12107, - "telecommunications": 12108, - "portable": 12109, - "lindsay": 12110, - "vatican": 12111, - "##eim": 12112, - "angles": 12113, - "aliens": 12114, - "marker": 12115, - "stint": 12116, - "screens": 12117, - "bolton": 12118, - "##rne": 12119, - "judy": 12120, - "wool": 12121, - "benedict": 12122, - "plasma": 12123, - "europa": 12124, - "spark": 12125, - "imaging": 12126, - "filmmaker": 12127, - "swiftly": 12128, - "##een": 12129, - "contributor": 12130, - "##nor": 12131, - "opted": 12132, - "stamps": 12133, - "apologize": 12134, - "financing": 12135, - "butter": 12136, - "gideon": 12137, - "sophisticated": 12138, - "alignment": 12139, - "avery": 12140, - "chemicals": 12141, - "yearly": 12142, - "speculation": 12143, - "prominence": 12144, - "professionally": 12145, - "##ils": 12146, - "immortal": 12147, - "institutional": 12148, - "inception": 12149, - "wrists": 12150, - "identifying": 12151, - "tribunal": 12152, - "derives": 12153, - "gains": 12154, - "##wo": 12155, - "papal": 12156, - "preference": 12157, - "linguistic": 12158, - "vince": 12159, - "operative": 12160, - "brewery": 12161, - "##ont": 12162, - "unemployment": 12163, - "boyd": 12164, - "##ured": 12165, - "##outs": 12166, - "albeit": 12167, - "prophet": 12168, - "1813": 12169, - "bi": 12170, - "##rr": 12171, - "##face": 12172, - "##rad": 12173, - "quarterly": 12174, - "asteroid": 12175, - "cleaned": 12176, - "radius": 12177, - "temper": 12178, - "##llen": 12179, - "telugu": 12180, - "jerk": 12181, - "viscount": 12182, - "menu": 12183, - "##ote": 12184, - "glimpse": 12185, - "##aya": 12186, - "yacht": 12187, - "hawaiian": 12188, - "baden": 12189, - "##rl": 12190, - "laptop": 12191, - "readily": 12192, - "##gu": 12193, - "monetary": 12194, - "offshore": 12195, - "scots": 12196, - "watches": 12197, - "##yang": 12198, - "##arian": 12199, - "upgrade": 12200, - "needle": 12201, - "xbox": 12202, - "lea": 12203, - "encyclopedia": 12204, - "flank": 12205, - "fingertips": 12206, - "##pus": 12207, - "delight": 12208, - "teachings": 12209, - "confirm": 12210, - "roth": 12211, - "beaches": 12212, - "midway": 12213, - "winters": 12214, - "##iah": 12215, - "teasing": 12216, - "daytime": 12217, - "beverly": 12218, - "gambling": 12219, - "bonnie": 12220, - "##backs": 12221, - "regulated": 12222, - "clement": 12223, - "hermann": 12224, - "tricks": 12225, - "knot": 12226, - "##shing": 12227, - "##uring": 12228, - "##vre": 12229, - "detached": 12230, - "ecological": 12231, - "owed": 12232, - "specialty": 12233, - "byron": 12234, - "inventor": 12235, - "bats": 12236, - "stays": 12237, - "screened": 12238, - "unesco": 12239, - "midland": 12240, - "trim": 12241, - "affection": 12242, - "##ander": 12243, - "##rry": 12244, - "jess": 12245, - "thoroughly": 12246, - "feedback": 12247, - "##uma": 12248, - "chennai": 12249, - "strained": 12250, - "heartbeat": 12251, - "wrapping": 12252, - "overtime": 12253, - "pleaded": 12254, - "##sworth": 12255, - "mon": 12256, - "leisure": 12257, - "oclc": 12258, - "##tate": 12259, - "##ele": 12260, - "feathers": 12261, - "angelo": 12262, - "thirds": 12263, - "nuts": 12264, - "surveys": 12265, - "clever": 12266, - "gill": 12267, - "commentator": 12268, - "##dos": 12269, - "darren": 12270, - "rides": 12271, - "gibraltar": 12272, - "##nc": 12273, - "##mu": 12274, - "dissolution": 12275, - "dedication": 12276, - "shin": 12277, - "meals": 12278, - "saddle": 12279, - "elvis": 12280, - "reds": 12281, - "chaired": 12282, - "taller": 12283, - "appreciation": 12284, - "functioning": 12285, - "niece": 12286, - "favored": 12287, - "advocacy": 12288, - "robbie": 12289, - "criminals": 12290, - "suffolk": 12291, - "yugoslav": 12292, - "passport": 12293, - "constable": 12294, - "congressman": 12295, - "hastings": 12296, - "vera": 12297, - "##rov": 12298, - "consecrated": 12299, - "sparks": 12300, - "ecclesiastical": 12301, - "confined": 12302, - "##ovich": 12303, - "muller": 12304, - "floyd": 12305, - "nora": 12306, - "1822": 12307, - "paved": 12308, - "1827": 12309, - "cumberland": 12310, - "ned": 12311, - "saga": 12312, - "spiral": 12313, - "##flow": 12314, - "appreciated": 12315, - "yi": 12316, - "collaborative": 12317, - "treating": 12318, - "similarities": 12319, - "feminine": 12320, - "finishes": 12321, - "##ib": 12322, - "jade": 12323, - "import": 12324, - "##nse": 12325, - "##hot": 12326, - "champagne": 12327, - "mice": 12328, - "securing": 12329, - "celebrities": 12330, - "helsinki": 12331, - "attributes": 12332, - "##gos": 12333, - "cousins": 12334, - "phases": 12335, - "ache": 12336, - "lucia": 12337, - "gandhi": 12338, - "submission": 12339, - "vicar": 12340, - "spear": 12341, - "shine": 12342, - "tasmania": 12343, - "biting": 12344, - "detention": 12345, - "constitute": 12346, - "tighter": 12347, - "seasonal": 12348, - "##gus": 12349, - "terrestrial": 12350, - "matthews": 12351, - "##oka": 12352, - "effectiveness": 12353, - "parody": 12354, - "philharmonic": 12355, - "##onic": 12356, - "1816": 12357, - "strangers": 12358, - "encoded": 12359, - "consortium": 12360, - "guaranteed": 12361, - "regards": 12362, - "shifts": 12363, - "tortured": 12364, - "collision": 12365, - "supervisor": 12366, - "inform": 12367, - "broader": 12368, - "insight": 12369, - "theaters": 12370, - "armour": 12371, - "emeritus": 12372, - "blink": 12373, - "incorporates": 12374, - "mapping": 12375, - "##50": 12376, - "##ein": 12377, - "handball": 12378, - "flexible": 12379, - "##nta": 12380, - "substantially": 12381, - "generous": 12382, - "thief": 12383, - "##own": 12384, - "carr": 12385, - "loses": 12386, - "1793": 12387, - "prose": 12388, - "ucla": 12389, - "romeo": 12390, - "generic": 12391, - "metallic": 12392, - "realization": 12393, - "damages": 12394, - "mk": 12395, - "commissioners": 12396, - "zach": 12397, - "default": 12398, - "##ther": 12399, - "helicopters": 12400, - "lengthy": 12401, - "stems": 12402, - "spa": 12403, - "partnered": 12404, - "spectators": 12405, - "rogue": 12406, - "indication": 12407, - "penalties": 12408, - "teresa": 12409, - "1801": 12410, - "sen": 12411, - "##tric": 12412, - "dalton": 12413, - "##wich": 12414, - "irving": 12415, - "photographic": 12416, - "##vey": 12417, - "dell": 12418, - "deaf": 12419, - "peters": 12420, - "excluded": 12421, - "unsure": 12422, - "##vable": 12423, - "patterson": 12424, - "crawled": 12425, - "##zio": 12426, - "resided": 12427, - "whipped": 12428, - "latvia": 12429, - "slower": 12430, - "ecole": 12431, - "pipes": 12432, - "employers": 12433, - "maharashtra": 12434, - "comparable": 12435, - "va": 12436, - "textile": 12437, - "pageant": 12438, - "##gel": 12439, - "alphabet": 12440, - "binary": 12441, - "irrigation": 12442, - "chartered": 12443, - "choked": 12444, - "antoine": 12445, - "offs": 12446, - "waking": 12447, - "supplement": 12448, - "##wen": 12449, - "quantities": 12450, - "demolition": 12451, - "regain": 12452, - "locate": 12453, - "urdu": 12454, - "folks": 12455, - "alt": 12456, - "114": 12457, - "##mc": 12458, - "scary": 12459, - "andreas": 12460, - "whites": 12461, - "##ava": 12462, - "classrooms": 12463, - "mw": 12464, - "aesthetic": 12465, - "publishes": 12466, - "valleys": 12467, - "guides": 12468, - "cubs": 12469, - "johannes": 12470, - "bryant": 12471, - "conventions": 12472, - "affecting": 12473, - "##itt": 12474, - "drain": 12475, - "awesome": 12476, - "isolation": 12477, - "prosecutor": 12478, - "ambitious": 12479, - "apology": 12480, - "captive": 12481, - "downs": 12482, - "atmospheric": 12483, - "lorenzo": 12484, - "aisle": 12485, - "beef": 12486, - "foul": 12487, - "##onia": 12488, - "kidding": 12489, - "composite": 12490, - "disturbed": 12491, - "illusion": 12492, - "natives": 12493, - "##ffer": 12494, - "emi": 12495, - "rockets": 12496, - "riverside": 12497, - "wartime": 12498, - "painters": 12499, - "adolf": 12500, - "melted": 12501, - "##ail": 12502, - "uncertainty": 12503, - "simulation": 12504, - "hawks": 12505, - "progressed": 12506, - "meantime": 12507, - "builder": 12508, - "spray": 12509, - "breach": 12510, - "unhappy": 12511, - "regina": 12512, - "russians": 12513, - "##urg": 12514, - "determining": 12515, - "##tation": 12516, - "tram": 12517, - "1806": 12518, - "##quin": 12519, - "aging": 12520, - "##12": 12521, - "1823": 12522, - "garion": 12523, - "rented": 12524, - "mister": 12525, - "diaz": 12526, - "terminated": 12527, - "clip": 12528, - "1817": 12529, - "depend": 12530, - "nervously": 12531, - "disco": 12532, - "owe": 12533, - "defenders": 12534, - "shiva": 12535, - "notorious": 12536, - "disbelief": 12537, - "shiny": 12538, - "worcester": 12539, - "##gation": 12540, - "##yr": 12541, - "trailing": 12542, - "undertook": 12543, - "islander": 12544, - "belarus": 12545, - "limitations": 12546, - "watershed": 12547, - "fuller": 12548, - "overlooking": 12549, - "utilized": 12550, - "raphael": 12551, - "1819": 12552, - "synthetic": 12553, - "breakdown": 12554, - "klein": 12555, - "##nate": 12556, - "moaned": 12557, - "memoir": 12558, - "lamb": 12559, - "practicing": 12560, - "##erly": 12561, - "cellular": 12562, - "arrows": 12563, - "exotic": 12564, - "##graphy": 12565, - "witches": 12566, - "117": 12567, - "charted": 12568, - "rey": 12569, - "hut": 12570, - "hierarchy": 12571, - "subdivision": 12572, - "freshwater": 12573, - "giuseppe": 12574, - "aloud": 12575, - "reyes": 12576, - "qatar": 12577, - "marty": 12578, - "sideways": 12579, - "utterly": 12580, - "sexually": 12581, - "jude": 12582, - "prayers": 12583, - "mccarthy": 12584, - "softball": 12585, - "blend": 12586, - "damien": 12587, - "##gging": 12588, - "##metric": 12589, - "wholly": 12590, - "erupted": 12591, - "lebanese": 12592, - "negro": 12593, - "revenues": 12594, - "tasted": 12595, - "comparative": 12596, - "teamed": 12597, - "transaction": 12598, - "labeled": 12599, - "maori": 12600, - "sovereignty": 12601, - "parkway": 12602, - "trauma": 12603, - "gran": 12604, - "malay": 12605, - "121": 12606, - "advancement": 12607, - "descendant": 12608, - "2020": 12609, - "buzz": 12610, - "salvation": 12611, - "inventory": 12612, - "symbolic": 12613, - "##making": 12614, - "antarctica": 12615, - "mps": 12616, - "##gas": 12617, - "##bro": 12618, - "mohammed": 12619, - "myanmar": 12620, - "holt": 12621, - "submarines": 12622, - "tones": 12623, - "##lman": 12624, - "locker": 12625, - "patriarch": 12626, - "bangkok": 12627, - "emerson": 12628, - "remarks": 12629, - "predators": 12630, - "kin": 12631, - "afghan": 12632, - "confession": 12633, - "norwich": 12634, - "rental": 12635, - "emerge": 12636, - "advantages": 12637, - "##zel": 12638, - "rca": 12639, - "##hold": 12640, - "shortened": 12641, - "storms": 12642, - "aidan": 12643, - "##matic": 12644, - "autonomy": 12645, - "compliance": 12646, - "##quet": 12647, - "dudley": 12648, - "atp": 12649, - "##osis": 12650, - "1803": 12651, - "motto": 12652, - "documentation": 12653, - "summary": 12654, - "professors": 12655, - "spectacular": 12656, - "christina": 12657, - "archdiocese": 12658, - "flashing": 12659, - "innocence": 12660, - "remake": 12661, - "##dell": 12662, - "psychic": 12663, - "reef": 12664, - "scare": 12665, - "employ": 12666, - "rs": 12667, - "sticks": 12668, - "meg": 12669, - "gus": 12670, - "leans": 12671, - "##ude": 12672, - "accompany": 12673, - "bergen": 12674, - "tomas": 12675, - "##iko": 12676, - "doom": 12677, - "wages": 12678, - "pools": 12679, - "##nch": 12680, - "##bes": 12681, - "breasts": 12682, - "scholarly": 12683, - "alison": 12684, - "outline": 12685, - "brittany": 12686, - "breakthrough": 12687, - "willis": 12688, - "realistic": 12689, - "##cut": 12690, - "##boro": 12691, - "competitor": 12692, - "##stan": 12693, - "pike": 12694, - "picnic": 12695, - "icon": 12696, - "designing": 12697, - "commercials": 12698, - "washing": 12699, - "villain": 12700, - "skiing": 12701, - "micro": 12702, - "costumes": 12703, - "auburn": 12704, - "halted": 12705, - "executives": 12706, - "##hat": 12707, - "logistics": 12708, - "cycles": 12709, - "vowel": 12710, - "applicable": 12711, - "barrett": 12712, - "exclaimed": 12713, - "eurovision": 12714, - "eternity": 12715, - "ramon": 12716, - "##umi": 12717, - "##lls": 12718, - "modifications": 12719, - "sweeping": 12720, - "disgust": 12721, - "##uck": 12722, - "torch": 12723, - "aviv": 12724, - "ensuring": 12725, - "rude": 12726, - "dusty": 12727, - "sonic": 12728, - "donovan": 12729, - "outskirts": 12730, - "cu": 12731, - "pathway": 12732, - "##band": 12733, - "##gun": 12734, - "##lines": 12735, - "disciplines": 12736, - "acids": 12737, - "cadet": 12738, - "paired": 12739, - "##40": 12740, - "sketches": 12741, - "##sive": 12742, - "marriages": 12743, - "##⁺": 12744, - "folding": 12745, - "peers": 12746, - "slovak": 12747, - "implies": 12748, - "admired": 12749, - "##beck": 12750, - "1880s": 12751, - "leopold": 12752, - "instinct": 12753, - "attained": 12754, - "weston": 12755, - "megan": 12756, - "horace": 12757, - "##ination": 12758, - "dorsal": 12759, - "ingredients": 12760, - "evolutionary": 12761, - "##its": 12762, - "complications": 12763, - "deity": 12764, - "lethal": 12765, - "brushing": 12766, - "levy": 12767, - "deserted": 12768, - "institutes": 12769, - "posthumously": 12770, - "delivering": 12771, - "telescope": 12772, - "coronation": 12773, - "motivated": 12774, - "rapids": 12775, - "luc": 12776, - "flicked": 12777, - "pays": 12778, - "volcano": 12779, - "tanner": 12780, - "weighed": 12781, - "##nica": 12782, - "crowds": 12783, - "frankie": 12784, - "gifted": 12785, - "addressing": 12786, - "granddaughter": 12787, - "winding": 12788, - "##rna": 12789, - "constantine": 12790, - "gomez": 12791, - "##front": 12792, - "landscapes": 12793, - "rudolf": 12794, - "anthropology": 12795, - "slate": 12796, - "werewolf": 12797, - "##lio": 12798, - "astronomy": 12799, - "circa": 12800, - "rouge": 12801, - "dreaming": 12802, - "sack": 12803, - "knelt": 12804, - "drowned": 12805, - "naomi": 12806, - "prolific": 12807, - "tracked": 12808, - "freezing": 12809, - "herb": 12810, - "##dium": 12811, - "agony": 12812, - "randall": 12813, - "twisting": 12814, - "wendy": 12815, - "deposit": 12816, - "touches": 12817, - "vein": 12818, - "wheeler": 12819, - "##bbled": 12820, - "##bor": 12821, - "batted": 12822, - "retaining": 12823, - "tire": 12824, - "presently": 12825, - "compare": 12826, - "specification": 12827, - "daemon": 12828, - "nigel": 12829, - "##grave": 12830, - "merry": 12831, - "recommendation": 12832, - "czechoslovakia": 12833, - "sandra": 12834, - "ng": 12835, - "roma": 12836, - "##sts": 12837, - "lambert": 12838, - "inheritance": 12839, - "sheikh": 12840, - "winchester": 12841, - "cries": 12842, - "examining": 12843, - "##yle": 12844, - "comeback": 12845, - "cuisine": 12846, - "nave": 12847, - "##iv": 12848, - "ko": 12849, - "retrieve": 12850, - "tomatoes": 12851, - "barker": 12852, - "polished": 12853, - "defining": 12854, - "irene": 12855, - "lantern": 12856, - "personalities": 12857, - "begging": 12858, - "tract": 12859, - "swore": 12860, - "1809": 12861, - "175": 12862, - "##gic": 12863, - "omaha": 12864, - "brotherhood": 12865, - "##rley": 12866, - "haiti": 12867, - "##ots": 12868, - "exeter": 12869, - "##ete": 12870, - "##zia": 12871, - "steele": 12872, - "dumb": 12873, - "pearson": 12874, - "210": 12875, - "surveyed": 12876, - "elisabeth": 12877, - "trends": 12878, - "##ef": 12879, - "fritz": 12880, - "##rf": 12881, - "premium": 12882, - "bugs": 12883, - "fraction": 12884, - "calmly": 12885, - "viking": 12886, - "##birds": 12887, - "tug": 12888, - "inserted": 12889, - "unusually": 12890, - "##ield": 12891, - "confronted": 12892, - "distress": 12893, - "crashing": 12894, - "brent": 12895, - "turks": 12896, - "resign": 12897, - "##olo": 12898, - "cambodia": 12899, - "gabe": 12900, - "sauce": 12901, - "##kal": 12902, - "evelyn": 12903, - "116": 12904, - "extant": 12905, - "clusters": 12906, - "quarry": 12907, - "teenagers": 12908, - "luna": 12909, - "##lers": 12910, - "##ister": 12911, - "affiliation": 12912, - "drill": 12913, - "##ashi": 12914, - "panthers": 12915, - "scenic": 12916, - "libya": 12917, - "anita": 12918, - "strengthen": 12919, - "inscriptions": 12920, - "##cated": 12921, - "lace": 12922, - "sued": 12923, - "judith": 12924, - "riots": 12925, - "##uted": 12926, - "mint": 12927, - "##eta": 12928, - "preparations": 12929, - "midst": 12930, - "dub": 12931, - "challenger": 12932, - "##vich": 12933, - "mock": 12934, - "cf": 12935, - "displaced": 12936, - "wicket": 12937, - "breaths": 12938, - "enables": 12939, - "schmidt": 12940, - "analyst": 12941, - "##lum": 12942, - "ag": 12943, - "highlight": 12944, - "automotive": 12945, - "axe": 12946, - "josef": 12947, - "newark": 12948, - "sufficiently": 12949, - "resembles": 12950, - "50th": 12951, - "##pal": 12952, - "flushed": 12953, - "mum": 12954, - "traits": 12955, - "##ante": 12956, - "commodore": 12957, - "incomplete": 12958, - "warming": 12959, - "titular": 12960, - "ceremonial": 12961, - "ethical": 12962, - "118": 12963, - "celebrating": 12964, - "eighteenth": 12965, - "cao": 12966, - "lima": 12967, - "medalist": 12968, - "mobility": 12969, - "strips": 12970, - "snakes": 12971, - "##city": 12972, - "miniature": 12973, - "zagreb": 12974, - "barton": 12975, - "escapes": 12976, - "umbrella": 12977, - "automated": 12978, - "doubted": 12979, - "differs": 12980, - "cooled": 12981, - "georgetown": 12982, - "dresden": 12983, - "cooked": 12984, - "fade": 12985, - "wyatt": 12986, - "rna": 12987, - "jacobs": 12988, - "carlton": 12989, - "abundant": 12990, - "stereo": 12991, - "boost": 12992, - "madras": 12993, - "inning": 12994, - "##hia": 12995, - "spur": 12996, - "ip": 12997, - "malayalam": 12998, - "begged": 12999, - "osaka": 13000, - "groan": 13001, - "escaping": 13002, - "charging": 13003, - "dose": 13004, - "vista": 13005, - "##aj": 13006, - "bud": 13007, - "papa": 13008, - "communists": 13009, - "advocates": 13010, - "edged": 13011, - "tri": 13012, - "##cent": 13013, - "resemble": 13014, - "peaking": 13015, - "necklace": 13016, - "fried": 13017, - "montenegro": 13018, - "saxony": 13019, - "goose": 13020, - "glances": 13021, - "stuttgart": 13022, - "curator": 13023, - "recruit": 13024, - "grocery": 13025, - "sympathetic": 13026, - "##tting": 13027, - "##fort": 13028, - "127": 13029, - "lotus": 13030, - "randolph": 13031, - "ancestor": 13032, - "##rand": 13033, - "succeeding": 13034, - "jupiter": 13035, - "1798": 13036, - "macedonian": 13037, - "##heads": 13038, - "hiking": 13039, - "1808": 13040, - "handing": 13041, - "fischer": 13042, - "##itive": 13043, - "garbage": 13044, - "node": 13045, - "##pies": 13046, - "prone": 13047, - "singular": 13048, - "papua": 13049, - "inclined": 13050, - "attractions": 13051, - "italia": 13052, - "pouring": 13053, - "motioned": 13054, - "grandma": 13055, - "garnered": 13056, - "jacksonville": 13057, - "corp": 13058, - "ego": 13059, - "ringing": 13060, - "aluminum": 13061, - "##hausen": 13062, - "ordering": 13063, - "##foot": 13064, - "drawer": 13065, - "traders": 13066, - "synagogue": 13067, - "##play": 13068, - "##kawa": 13069, - "resistant": 13070, - "wandering": 13071, - "fragile": 13072, - "fiona": 13073, - "teased": 13074, - "var": 13075, - "hardcore": 13076, - "soaked": 13077, - "jubilee": 13078, - "decisive": 13079, - "exposition": 13080, - "mercer": 13081, - "poster": 13082, - "valencia": 13083, - "hale": 13084, - "kuwait": 13085, - "1811": 13086, - "##ises": 13087, - "##wr": 13088, - "##eed": 13089, - "tavern": 13090, - "gamma": 13091, - "122": 13092, - "johan": 13093, - "##uer": 13094, - "airways": 13095, - "amino": 13096, - "gil": 13097, - "##ury": 13098, - "vocational": 13099, - "domains": 13100, - "torres": 13101, - "##sp": 13102, - "generator": 13103, - "folklore": 13104, - "outcomes": 13105, - "##keeper": 13106, - "canberra": 13107, - "shooter": 13108, - "fl": 13109, - "beams": 13110, - "confrontation": 13111, - "##lling": 13112, - "##gram": 13113, - "feb": 13114, - "aligned": 13115, - "forestry": 13116, - "pipeline": 13117, - "jax": 13118, - "motorway": 13119, - "conception": 13120, - "decay": 13121, - "##tos": 13122, - "coffin": 13123, - "##cott": 13124, - "stalin": 13125, - "1805": 13126, - "escorted": 13127, - "minded": 13128, - "##nam": 13129, - "sitcom": 13130, - "purchasing": 13131, - "twilight": 13132, - "veronica": 13133, - "additions": 13134, - "passive": 13135, - "tensions": 13136, - "straw": 13137, - "123": 13138, - "frequencies": 13139, - "1804": 13140, - "refugee": 13141, - "cultivation": 13142, - "##iate": 13143, - "christie": 13144, - "clary": 13145, - "bulletin": 13146, - "crept": 13147, - "disposal": 13148, - "##rich": 13149, - "##zong": 13150, - "processor": 13151, - "crescent": 13152, - "##rol": 13153, - "bmw": 13154, - "emphasized": 13155, - "whale": 13156, - "nazis": 13157, - "aurora": 13158, - "##eng": 13159, - "dwelling": 13160, - "hauled": 13161, - "sponsors": 13162, - "toledo": 13163, - "mega": 13164, - "ideology": 13165, - "theatres": 13166, - "tessa": 13167, - "cerambycidae": 13168, - "saves": 13169, - "turtle": 13170, - "cone": 13171, - "suspects": 13172, - "kara": 13173, - "rusty": 13174, - "yelling": 13175, - "greeks": 13176, - "mozart": 13177, - "shades": 13178, - "cocked": 13179, - "participant": 13180, - "##tro": 13181, - "shire": 13182, - "spit": 13183, - "freeze": 13184, - "necessity": 13185, - "##cos": 13186, - "inmates": 13187, - "nielsen": 13188, - "councillors": 13189, - "loaned": 13190, - "uncommon": 13191, - "omar": 13192, - "peasants": 13193, - "botanical": 13194, - "offspring": 13195, - "daniels": 13196, - "formations": 13197, - "jokes": 13198, - "1794": 13199, - "pioneers": 13200, - "sigma": 13201, - "licensing": 13202, - "##sus": 13203, - "wheelchair": 13204, - "polite": 13205, - "1807": 13206, - "liquor": 13207, - "pratt": 13208, - "trustee": 13209, - "##uta": 13210, - "forewings": 13211, - "balloon": 13212, - "##zz": 13213, - "kilometre": 13214, - "camping": 13215, - "explicit": 13216, - "casually": 13217, - "shawn": 13218, - "foolish": 13219, - "teammates": 13220, - "nm": 13221, - "hassan": 13222, - "carrie": 13223, - "judged": 13224, - "satisfy": 13225, - "vanessa": 13226, - "knives": 13227, - "selective": 13228, - "cnn": 13229, - "flowed": 13230, - "##lice": 13231, - "eclipse": 13232, - "stressed": 13233, - "eliza": 13234, - "mathematician": 13235, - "cease": 13236, - "cultivated": 13237, - "##roy": 13238, - "commissions": 13239, - "browns": 13240, - "##ania": 13241, - "destroyers": 13242, - "sheridan": 13243, - "meadow": 13244, - "##rius": 13245, - "minerals": 13246, - "##cial": 13247, - "downstream": 13248, - "clash": 13249, - "gram": 13250, - "memoirs": 13251, - "ventures": 13252, - "baha": 13253, - "seymour": 13254, - "archie": 13255, - "midlands": 13256, - "edith": 13257, - "fare": 13258, - "flynn": 13259, - "invite": 13260, - "canceled": 13261, - "tiles": 13262, - "stabbed": 13263, - "boulder": 13264, - "incorporate": 13265, - "amended": 13266, - "camden": 13267, - "facial": 13268, - "mollusk": 13269, - "unreleased": 13270, - "descriptions": 13271, - "yoga": 13272, - "grabs": 13273, - "550": 13274, - "raises": 13275, - "ramp": 13276, - "shiver": 13277, - "##rose": 13278, - "coined": 13279, - "pioneering": 13280, - "tunes": 13281, - "qing": 13282, - "warwick": 13283, - "tops": 13284, - "119": 13285, - "melanie": 13286, - "giles": 13287, - "##rous": 13288, - "wandered": 13289, - "##inal": 13290, - "annexed": 13291, - "nov": 13292, - "30th": 13293, - "unnamed": 13294, - "##ished": 13295, - "organizational": 13296, - "airplane": 13297, - "normandy": 13298, - "stoke": 13299, - "whistle": 13300, - "blessing": 13301, - "violations": 13302, - "chased": 13303, - "holders": 13304, - "shotgun": 13305, - "##ctic": 13306, - "outlet": 13307, - "reactor": 13308, - "##vik": 13309, - "tires": 13310, - "tearing": 13311, - "shores": 13312, - "fortified": 13313, - "mascot": 13314, - "constituencies": 13315, - "nc": 13316, - "columnist": 13317, - "productive": 13318, - "tibet": 13319, - "##rta": 13320, - "lineage": 13321, - "hooked": 13322, - "oct": 13323, - "tapes": 13324, - "judging": 13325, - "cody": 13326, - "##gger": 13327, - "hansen": 13328, - "kashmir": 13329, - "triggered": 13330, - "##eva": 13331, - "solved": 13332, - "cliffs": 13333, - "##tree": 13334, - "resisted": 13335, - "anatomy": 13336, - "protesters": 13337, - "transparent": 13338, - "implied": 13339, - "##iga": 13340, - "injection": 13341, - "mattress": 13342, - "excluding": 13343, - "##mbo": 13344, - "defenses": 13345, - "helpless": 13346, - "devotion": 13347, - "##elli": 13348, - "growl": 13349, - "liberals": 13350, - "weber": 13351, - "phenomena": 13352, - "atoms": 13353, - "plug": 13354, - "##iff": 13355, - "mortality": 13356, - "apprentice": 13357, - "howe": 13358, - "convincing": 13359, - "aaa": 13360, - "swimmer": 13361, - "barber": 13362, - "leone": 13363, - "promptly": 13364, - "sodium": 13365, - "def": 13366, - "nowadays": 13367, - "arise": 13368, - "##oning": 13369, - "gloucester": 13370, - "corrected": 13371, - "dignity": 13372, - "norm": 13373, - "erie": 13374, - "##ders": 13375, - "elders": 13376, - "evacuated": 13377, - "sylvia": 13378, - "compression": 13379, - "##yar": 13380, - "hartford": 13381, - "pose": 13382, - "backpack": 13383, - "reasoning": 13384, - "accepts": 13385, - "24th": 13386, - "wipe": 13387, - "millimetres": 13388, - "marcel": 13389, - "##oda": 13390, - "dodgers": 13391, - "albion": 13392, - "1790": 13393, - "overwhelmed": 13394, - "aerospace": 13395, - "oaks": 13396, - "1795": 13397, - "showcase": 13398, - "acknowledge": 13399, - "recovering": 13400, - "nolan": 13401, - "ashe": 13402, - "hurts": 13403, - "geology": 13404, - "fashioned": 13405, - "disappearance": 13406, - "farewell": 13407, - "swollen": 13408, - "shrug": 13409, - "marquis": 13410, - "wimbledon": 13411, - "124": 13412, - "rue": 13413, - "1792": 13414, - "commemorate": 13415, - "reduces": 13416, - "experiencing": 13417, - "inevitable": 13418, - "calcutta": 13419, - "intel": 13420, - "##court": 13421, - "murderer": 13422, - "sticking": 13423, - "fisheries": 13424, - "imagery": 13425, - "bloom": 13426, - "280": 13427, - "brake": 13428, - "##inus": 13429, - "gustav": 13430, - "hesitation": 13431, - "memorable": 13432, - "po": 13433, - "viral": 13434, - "beans": 13435, - "accidents": 13436, - "tunisia": 13437, - "antenna": 13438, - "spilled": 13439, - "consort": 13440, - "treatments": 13441, - "aye": 13442, - "perimeter": 13443, - "##gard": 13444, - "donation": 13445, - "hostage": 13446, - "migrated": 13447, - "banker": 13448, - "addiction": 13449, - "apex": 13450, - "lil": 13451, - "trout": 13452, - "##ously": 13453, - "conscience": 13454, - "##nova": 13455, - "rams": 13456, - "sands": 13457, - "genome": 13458, - "passionate": 13459, - "troubles": 13460, - "##lets": 13461, - "##set": 13462, - "amid": 13463, - "##ibility": 13464, - "##ret": 13465, - "higgins": 13466, - "exceed": 13467, - "vikings": 13468, - "##vie": 13469, - "payne": 13470, - "##zan": 13471, - "muscular": 13472, - "##ste": 13473, - "defendant": 13474, - "sucking": 13475, - "##wal": 13476, - "ibrahim": 13477, - "fuselage": 13478, - "claudia": 13479, - "vfl": 13480, - "europeans": 13481, - "snails": 13482, - "interval": 13483, - "##garh": 13484, - "preparatory": 13485, - "statewide": 13486, - "tasked": 13487, - "lacrosse": 13488, - "viktor": 13489, - "##lation": 13490, - "angola": 13491, - "##hra": 13492, - "flint": 13493, - "implications": 13494, - "employs": 13495, - "teens": 13496, - "patrons": 13497, - "stall": 13498, - "weekends": 13499, - "barriers": 13500, - "scrambled": 13501, - "nucleus": 13502, - "tehran": 13503, - "jenna": 13504, - "parsons": 13505, - "lifelong": 13506, - "robots": 13507, - "displacement": 13508, - "5000": 13509, - "##bles": 13510, - "precipitation": 13511, - "##gt": 13512, - "knuckles": 13513, - "clutched": 13514, - "1802": 13515, - "marrying": 13516, - "ecology": 13517, - "marx": 13518, - "accusations": 13519, - "declare": 13520, - "scars": 13521, - "kolkata": 13522, - "mat": 13523, - "meadows": 13524, - "bermuda": 13525, - "skeleton": 13526, - "finalists": 13527, - "vintage": 13528, - "crawl": 13529, - "coordinate": 13530, - "affects": 13531, - "subjected": 13532, - "orchestral": 13533, - "mistaken": 13534, - "##tc": 13535, - "mirrors": 13536, - "dipped": 13537, - "relied": 13538, - "260": 13539, - "arches": 13540, - "candle": 13541, - "##nick": 13542, - "incorporating": 13543, - "wildly": 13544, - "fond": 13545, - "basilica": 13546, - "owl": 13547, - "fringe": 13548, - "rituals": 13549, - "whispering": 13550, - "stirred": 13551, - "feud": 13552, - "tertiary": 13553, - "slick": 13554, - "goat": 13555, - "honorable": 13556, - "whereby": 13557, - "skip": 13558, - "ricardo": 13559, - "stripes": 13560, - "parachute": 13561, - "adjoining": 13562, - "submerged": 13563, - "synthesizer": 13564, - "##gren": 13565, - "intend": 13566, - "positively": 13567, - "ninety": 13568, - "phi": 13569, - "beaver": 13570, - "partition": 13571, - "fellows": 13572, - "alexis": 13573, - "prohibition": 13574, - "carlisle": 13575, - "bizarre": 13576, - "fraternity": 13577, - "##bre": 13578, - "doubts": 13579, - "icy": 13580, - "cbc": 13581, - "aquatic": 13582, - "sneak": 13583, - "sonny": 13584, - "combines": 13585, - "airports": 13586, - "crude": 13587, - "supervised": 13588, - "spatial": 13589, - "merge": 13590, - "alfonso": 13591, - "##bic": 13592, - "corrupt": 13593, - "scan": 13594, - "undergo": 13595, - "##ams": 13596, - "disabilities": 13597, - "colombian": 13598, - "comparing": 13599, - "dolphins": 13600, - "perkins": 13601, - "##lish": 13602, - "reprinted": 13603, - "unanimous": 13604, - "bounced": 13605, - "hairs": 13606, - "underworld": 13607, - "midwest": 13608, - "semester": 13609, - "bucket": 13610, - "paperback": 13611, - "miniseries": 13612, - "coventry": 13613, - "demise": 13614, - "##leigh": 13615, - "demonstrations": 13616, - "sensor": 13617, - "rotating": 13618, - "yan": 13619, - "##hler": 13620, - "arrange": 13621, - "soils": 13622, - "##idge": 13623, - "hyderabad": 13624, - "labs": 13625, - "##dr": 13626, - "brakes": 13627, - "grandchildren": 13628, - "##nde": 13629, - "negotiated": 13630, - "rover": 13631, - "ferrari": 13632, - "continuation": 13633, - "directorate": 13634, - "augusta": 13635, - "stevenson": 13636, - "counterpart": 13637, - "gore": 13638, - "##rda": 13639, - "nursery": 13640, - "rican": 13641, - "ave": 13642, - "collectively": 13643, - "broadly": 13644, - "pastoral": 13645, - "repertoire": 13646, - "asserted": 13647, - "discovering": 13648, - "nordic": 13649, - "styled": 13650, - "fiba": 13651, - "cunningham": 13652, - "harley": 13653, - "middlesex": 13654, - "survives": 13655, - "tumor": 13656, - "tempo": 13657, - "zack": 13658, - "aiming": 13659, - "lok": 13660, - "urgent": 13661, - "##rade": 13662, - "##nto": 13663, - "devils": 13664, - "##ement": 13665, - "contractor": 13666, - "turin": 13667, - "##wl": 13668, - "##ool": 13669, - "bliss": 13670, - "repaired": 13671, - "simmons": 13672, - "moan": 13673, - "astronomical": 13674, - "cr": 13675, - "negotiate": 13676, - "lyric": 13677, - "1890s": 13678, - "lara": 13679, - "bred": 13680, - "clad": 13681, - "angus": 13682, - "pbs": 13683, - "##ience": 13684, - "engineered": 13685, - "posed": 13686, - "##lk": 13687, - "hernandez": 13688, - "possessions": 13689, - "elbows": 13690, - "psychiatric": 13691, - "strokes": 13692, - "confluence": 13693, - "electorate": 13694, - "lifts": 13695, - "campuses": 13696, - "lava": 13697, - "alps": 13698, - "##ep": 13699, - "##ution": 13700, - "##date": 13701, - "physicist": 13702, - "woody": 13703, - "##page": 13704, - "##ographic": 13705, - "##itis": 13706, - "juliet": 13707, - "reformation": 13708, - "sparhawk": 13709, - "320": 13710, - "complement": 13711, - "suppressed": 13712, - "jewel": 13713, - "##½": 13714, - "floated": 13715, - "##kas": 13716, - "continuity": 13717, - "sadly": 13718, - "##ische": 13719, - "inability": 13720, - "melting": 13721, - "scanning": 13722, - "paula": 13723, - "flour": 13724, - "judaism": 13725, - "safer": 13726, - "vague": 13727, - "##lm": 13728, - "solving": 13729, - "curb": 13730, - "##stown": 13731, - "financially": 13732, - "gable": 13733, - "bees": 13734, - "expired": 13735, - "miserable": 13736, - "cassidy": 13737, - "dominion": 13738, - "1789": 13739, - "cupped": 13740, - "145": 13741, - "robbery": 13742, - "facto": 13743, - "amos": 13744, - "warden": 13745, - "resume": 13746, - "tallest": 13747, - "marvin": 13748, - "ing": 13749, - "pounded": 13750, - "usd": 13751, - "declaring": 13752, - "gasoline": 13753, - "##aux": 13754, - "darkened": 13755, - "270": 13756, - "650": 13757, - "sophomore": 13758, - "##mere": 13759, - "erection": 13760, - "gossip": 13761, - "televised": 13762, - "risen": 13763, - "dial": 13764, - "##eu": 13765, - "pillars": 13766, - "##link": 13767, - "passages": 13768, - "profound": 13769, - "##tina": 13770, - "arabian": 13771, - "ashton": 13772, - "silicon": 13773, - "nail": 13774, - "##ead": 13775, - "##lated": 13776, - "##wer": 13777, - "##hardt": 13778, - "fleming": 13779, - "firearms": 13780, - "ducked": 13781, - "circuits": 13782, - "blows": 13783, - "waterloo": 13784, - "titans": 13785, - "##lina": 13786, - "atom": 13787, - "fireplace": 13788, - "cheshire": 13789, - "financed": 13790, - "activation": 13791, - "algorithms": 13792, - "##zzi": 13793, - "constituent": 13794, - "catcher": 13795, - "cherokee": 13796, - "partnerships": 13797, - "sexuality": 13798, - "platoon": 13799, - "tragic": 13800, - "vivian": 13801, - "guarded": 13802, - "whiskey": 13803, - "meditation": 13804, - "poetic": 13805, - "##late": 13806, - "##nga": 13807, - "##ake": 13808, - "porto": 13809, - "listeners": 13810, - "dominance": 13811, - "kendra": 13812, - "mona": 13813, - "chandler": 13814, - "factions": 13815, - "22nd": 13816, - "salisbury": 13817, - "attitudes": 13818, - "derivative": 13819, - "##ido": 13820, - "##haus": 13821, - "intake": 13822, - "paced": 13823, - "javier": 13824, - "illustrator": 13825, - "barrels": 13826, - "bias": 13827, - "cockpit": 13828, - "burnett": 13829, - "dreamed": 13830, - "ensuing": 13831, - "##anda": 13832, - "receptors": 13833, - "someday": 13834, - "hawkins": 13835, - "mattered": 13836, - "##lal": 13837, - "slavic": 13838, - "1799": 13839, - "jesuit": 13840, - "cameroon": 13841, - "wasted": 13842, - "tai": 13843, - "wax": 13844, - "lowering": 13845, - "victorious": 13846, - "freaking": 13847, - "outright": 13848, - "hancock": 13849, - "librarian": 13850, - "sensing": 13851, - "bald": 13852, - "calcium": 13853, - "myers": 13854, - "tablet": 13855, - "announcing": 13856, - "barack": 13857, - "shipyard": 13858, - "pharmaceutical": 13859, - "##uan": 13860, - "greenwich": 13861, - "flush": 13862, - "medley": 13863, - "patches": 13864, - "wolfgang": 13865, - "pt": 13866, - "speeches": 13867, - "acquiring": 13868, - "exams": 13869, - "nikolai": 13870, - "##gg": 13871, - "hayden": 13872, - "kannada": 13873, - "##type": 13874, - "reilly": 13875, - "##pt": 13876, - "waitress": 13877, - "abdomen": 13878, - "devastated": 13879, - "capped": 13880, - "pseudonym": 13881, - "pharmacy": 13882, - "fulfill": 13883, - "paraguay": 13884, - "1796": 13885, - "clicked": 13886, - "##trom": 13887, - "archipelago": 13888, - "syndicated": 13889, - "##hman": 13890, - "lumber": 13891, - "orgasm": 13892, - "rejection": 13893, - "clifford": 13894, - "lorraine": 13895, - "advent": 13896, - "mafia": 13897, - "rodney": 13898, - "brock": 13899, - "##ght": 13900, - "##used": 13901, - "##elia": 13902, - "cassette": 13903, - "chamberlain": 13904, - "despair": 13905, - "mongolia": 13906, - "sensors": 13907, - "developmental": 13908, - "upstream": 13909, - "##eg": 13910, - "##alis": 13911, - "spanning": 13912, - "165": 13913, - "trombone": 13914, - "basque": 13915, - "seeded": 13916, - "interred": 13917, - "renewable": 13918, - "rhys": 13919, - "leapt": 13920, - "revision": 13921, - "molecule": 13922, - "##ages": 13923, - "chord": 13924, - "vicious": 13925, - "nord": 13926, - "shivered": 13927, - "23rd": 13928, - "arlington": 13929, - "debts": 13930, - "corpus": 13931, - "sunrise": 13932, - "bays": 13933, - "blackburn": 13934, - "centimetres": 13935, - "##uded": 13936, - "shuddered": 13937, - "gm": 13938, - "strangely": 13939, - "gripping": 13940, - "cartoons": 13941, - "isabelle": 13942, - "orbital": 13943, - "##ppa": 13944, - "seals": 13945, - "proving": 13946, - "##lton": 13947, - "refusal": 13948, - "strengthened": 13949, - "bust": 13950, - "assisting": 13951, - "baghdad": 13952, - "batsman": 13953, - "portrayal": 13954, - "mara": 13955, - "pushes": 13956, - "spears": 13957, - "og": 13958, - "##cock": 13959, - "reside": 13960, - "nathaniel": 13961, - "brennan": 13962, - "1776": 13963, - "confirmation": 13964, - "caucus": 13965, - "##worthy": 13966, - "markings": 13967, - "yemen": 13968, - "nobles": 13969, - "ku": 13970, - "lazy": 13971, - "viewer": 13972, - "catalan": 13973, - "encompasses": 13974, - "sawyer": 13975, - "##fall": 13976, - "sparked": 13977, - "substances": 13978, - "patents": 13979, - "braves": 13980, - "arranger": 13981, - "evacuation": 13982, - "sergio": 13983, - "persuade": 13984, - "dover": 13985, - "tolerance": 13986, - "penguin": 13987, - "cum": 13988, - "jockey": 13989, - "insufficient": 13990, - "townships": 13991, - "occupying": 13992, - "declining": 13993, - "plural": 13994, - "processed": 13995, - "projection": 13996, - "puppet": 13997, - "flanders": 13998, - "introduces": 13999, - "liability": 14000, - "##yon": 14001, - "gymnastics": 14002, - "antwerp": 14003, - "taipei": 14004, - "hobart": 14005, - "candles": 14006, - "jeep": 14007, - "wes": 14008, - "observers": 14009, - "126": 14010, - "chaplain": 14011, - "bundle": 14012, - "glorious": 14013, - "##hine": 14014, - "hazel": 14015, - "flung": 14016, - "sol": 14017, - "excavations": 14018, - "dumped": 14019, - "stares": 14020, - "sh": 14021, - "bangalore": 14022, - "triangular": 14023, - "icelandic": 14024, - "intervals": 14025, - "expressing": 14026, - "turbine": 14027, - "##vers": 14028, - "songwriting": 14029, - "crafts": 14030, - "##igo": 14031, - "jasmine": 14032, - "ditch": 14033, - "rite": 14034, - "##ways": 14035, - "entertaining": 14036, - "comply": 14037, - "sorrow": 14038, - "wrestlers": 14039, - "basel": 14040, - "emirates": 14041, - "marian": 14042, - "rivera": 14043, - "helpful": 14044, - "##some": 14045, - "caution": 14046, - "downward": 14047, - "networking": 14048, - "##atory": 14049, - "##tered": 14050, - "darted": 14051, - "genocide": 14052, - "emergence": 14053, - "replies": 14054, - "specializing": 14055, - "spokesman": 14056, - "convenient": 14057, - "unlocked": 14058, - "fading": 14059, - "augustine": 14060, - "concentrations": 14061, - "resemblance": 14062, - "elijah": 14063, - "investigator": 14064, - "andhra": 14065, - "##uda": 14066, - "promotes": 14067, - "bean": 14068, - "##rrell": 14069, - "fleeing": 14070, - "wan": 14071, - "simone": 14072, - "announcer": 14073, - "##ame": 14074, - "##bby": 14075, - "lydia": 14076, - "weaver": 14077, - "132": 14078, - "residency": 14079, - "modification": 14080, - "##fest": 14081, - "stretches": 14082, - "##ast": 14083, - "alternatively": 14084, - "nat": 14085, - "lowe": 14086, - "lacks": 14087, - "##ented": 14088, - "pam": 14089, - "tile": 14090, - "concealed": 14091, - "inferior": 14092, - "abdullah": 14093, - "residences": 14094, - "tissues": 14095, - "vengeance": 14096, - "##ided": 14097, - "moisture": 14098, - "peculiar": 14099, - "groove": 14100, - "zip": 14101, - "bologna": 14102, - "jennings": 14103, - "ninja": 14104, - "oversaw": 14105, - "zombies": 14106, - "pumping": 14107, - "batch": 14108, - "livingston": 14109, - "emerald": 14110, - "installations": 14111, - "1797": 14112, - "peel": 14113, - "nitrogen": 14114, - "rama": 14115, - "##fying": 14116, - "##star": 14117, - "schooling": 14118, - "strands": 14119, - "responding": 14120, - "werner": 14121, - "##ost": 14122, - "lime": 14123, - "casa": 14124, - "accurately": 14125, - "targeting": 14126, - "##rod": 14127, - "underway": 14128, - "##uru": 14129, - "hemisphere": 14130, - "lester": 14131, - "##yard": 14132, - "occupies": 14133, - "2d": 14134, - "griffith": 14135, - "angrily": 14136, - "reorganized": 14137, - "##owing": 14138, - "courtney": 14139, - "deposited": 14140, - "##dd": 14141, - "##30": 14142, - "estadio": 14143, - "##ifies": 14144, - "dunn": 14145, - "exiled": 14146, - "##ying": 14147, - "checks": 14148, - "##combe": 14149, - "##о": 14150, - "##fly": 14151, - "successes": 14152, - "unexpectedly": 14153, - "blu": 14154, - "assessed": 14155, - "##flower": 14156, - "##ه": 14157, - "observing": 14158, - "sacked": 14159, - "spiders": 14160, - "kn": 14161, - "##tail": 14162, - "mu": 14163, - "nodes": 14164, - "prosperity": 14165, - "audrey": 14166, - "divisional": 14167, - "155": 14168, - "broncos": 14169, - "tangled": 14170, - "adjust": 14171, - "feeds": 14172, - "erosion": 14173, - "paolo": 14174, - "surf": 14175, - "directory": 14176, - "snatched": 14177, - "humid": 14178, - "admiralty": 14179, - "screwed": 14180, - "gt": 14181, - "reddish": 14182, - "##nese": 14183, - "modules": 14184, - "trench": 14185, - "lamps": 14186, - "bind": 14187, - "leah": 14188, - "bucks": 14189, - "competes": 14190, - "##nz": 14191, - "##form": 14192, - "transcription": 14193, - "##uc": 14194, - "isles": 14195, - "violently": 14196, - "clutching": 14197, - "pga": 14198, - "cyclist": 14199, - "inflation": 14200, - "flats": 14201, - "ragged": 14202, - "unnecessary": 14203, - "##hian": 14204, - "stubborn": 14205, - "coordinated": 14206, - "harriet": 14207, - "baba": 14208, - "disqualified": 14209, - "330": 14210, - "insect": 14211, - "wolfe": 14212, - "##fies": 14213, - "reinforcements": 14214, - "rocked": 14215, - "duel": 14216, - "winked": 14217, - "embraced": 14218, - "bricks": 14219, - "##raj": 14220, - "hiatus": 14221, - "defeats": 14222, - "pending": 14223, - "brightly": 14224, - "jealousy": 14225, - "##xton": 14226, - "##hm": 14227, - "##uki": 14228, - "lena": 14229, - "gdp": 14230, - "colorful": 14231, - "##dley": 14232, - "stein": 14233, - "kidney": 14234, - "##shu": 14235, - "underwear": 14236, - "wanderers": 14237, - "##haw": 14238, - "##icus": 14239, - "guardians": 14240, - "m³": 14241, - "roared": 14242, - "habits": 14243, - "##wise": 14244, - "permits": 14245, - "gp": 14246, - "uranium": 14247, - "punished": 14248, - "disguise": 14249, - "bundesliga": 14250, - "elise": 14251, - "dundee": 14252, - "erotic": 14253, - "partisan": 14254, - "pi": 14255, - "collectors": 14256, - "float": 14257, - "individually": 14258, - "rendering": 14259, - "behavioral": 14260, - "bucharest": 14261, - "ser": 14262, - "hare": 14263, - "valerie": 14264, - "corporal": 14265, - "nutrition": 14266, - "proportional": 14267, - "##isa": 14268, - "immense": 14269, - "##kis": 14270, - "pavement": 14271, - "##zie": 14272, - "##eld": 14273, - "sutherland": 14274, - "crouched": 14275, - "1775": 14276, - "##lp": 14277, - "suzuki": 14278, - "trades": 14279, - "endurance": 14280, - "operas": 14281, - "crosby": 14282, - "prayed": 14283, - "priory": 14284, - "rory": 14285, - "socially": 14286, - "##urn": 14287, - "gujarat": 14288, - "##pu": 14289, - "walton": 14290, - "cube": 14291, - "pasha": 14292, - "privilege": 14293, - "lennon": 14294, - "floods": 14295, - "thorne": 14296, - "waterfall": 14297, - "nipple": 14298, - "scouting": 14299, - "approve": 14300, - "##lov": 14301, - "minorities": 14302, - "voter": 14303, - "dwight": 14304, - "extensions": 14305, - "assure": 14306, - "ballroom": 14307, - "slap": 14308, - "dripping": 14309, - "privileges": 14310, - "rejoined": 14311, - "confessed": 14312, - "demonstrating": 14313, - "patriotic": 14314, - "yell": 14315, - "investor": 14316, - "##uth": 14317, - "pagan": 14318, - "slumped": 14319, - "squares": 14320, - "##cle": 14321, - "##kins": 14322, - "confront": 14323, - "bert": 14324, - "embarrassment": 14325, - "##aid": 14326, - "aston": 14327, - "urging": 14328, - "sweater": 14329, - "starr": 14330, - "yuri": 14331, - "brains": 14332, - "williamson": 14333, - "commuter": 14334, - "mortar": 14335, - "structured": 14336, - "selfish": 14337, - "exports": 14338, - "##jon": 14339, - "cds": 14340, - "##him": 14341, - "unfinished": 14342, - "##rre": 14343, - "mortgage": 14344, - "destinations": 14345, - "##nagar": 14346, - "canoe": 14347, - "solitary": 14348, - "buchanan": 14349, - "delays": 14350, - "magistrate": 14351, - "fk": 14352, - "##pling": 14353, - "motivation": 14354, - "##lier": 14355, - "##vier": 14356, - "recruiting": 14357, - "assess": 14358, - "##mouth": 14359, - "malik": 14360, - "antique": 14361, - "1791": 14362, - "pius": 14363, - "rahman": 14364, - "reich": 14365, - "tub": 14366, - "zhou": 14367, - "smashed": 14368, - "airs": 14369, - "galway": 14370, - "xii": 14371, - "conditioning": 14372, - "honduras": 14373, - "discharged": 14374, - "dexter": 14375, - "##pf": 14376, - "lionel": 14377, - "129": 14378, - "debates": 14379, - "lemon": 14380, - "tiffany": 14381, - "volunteered": 14382, - "dom": 14383, - "dioxide": 14384, - "procession": 14385, - "devi": 14386, - "sic": 14387, - "tremendous": 14388, - "advertisements": 14389, - "colts": 14390, - "transferring": 14391, - "verdict": 14392, - "hanover": 14393, - "decommissioned": 14394, - "utter": 14395, - "relate": 14396, - "pac": 14397, - "racism": 14398, - "##top": 14399, - "beacon": 14400, - "limp": 14401, - "similarity": 14402, - "terra": 14403, - "occurrence": 14404, - "ant": 14405, - "##how": 14406, - "becky": 14407, - "capt": 14408, - "updates": 14409, - "armament": 14410, - "richie": 14411, - "pal": 14412, - "##graph": 14413, - "halloween": 14414, - "mayo": 14415, - "##ssen": 14416, - "##bone": 14417, - "cara": 14418, - "serena": 14419, - "fcc": 14420, - "dolls": 14421, - "obligations": 14422, - "##dling": 14423, - "violated": 14424, - "lafayette": 14425, - "jakarta": 14426, - "exploitation": 14427, - "##ime": 14428, - "infamous": 14429, - "iconic": 14430, - "##lah": 14431, - "##park": 14432, - "kitty": 14433, - "moody": 14434, - "reginald": 14435, - "dread": 14436, - "spill": 14437, - "crystals": 14438, - "olivier": 14439, - "modeled": 14440, - "bluff": 14441, - "equilibrium": 14442, - "separating": 14443, - "notices": 14444, - "ordnance": 14445, - "extinction": 14446, - "onset": 14447, - "cosmic": 14448, - "attachment": 14449, - "sammy": 14450, - "expose": 14451, - "privy": 14452, - "anchored": 14453, - "##bil": 14454, - "abbott": 14455, - "admits": 14456, - "bending": 14457, - "baritone": 14458, - "emmanuel": 14459, - "policeman": 14460, - "vaughan": 14461, - "winged": 14462, - "climax": 14463, - "dresses": 14464, - "denny": 14465, - "polytechnic": 14466, - "mohamed": 14467, - "burmese": 14468, - "authentic": 14469, - "nikki": 14470, - "genetics": 14471, - "grandparents": 14472, - "homestead": 14473, - "gaza": 14474, - "postponed": 14475, - "metacritic": 14476, - "una": 14477, - "##sby": 14478, - "##bat": 14479, - "unstable": 14480, - "dissertation": 14481, - "##rial": 14482, - "##cian": 14483, - "curls": 14484, - "obscure": 14485, - "uncovered": 14486, - "bronx": 14487, - "praying": 14488, - "disappearing": 14489, - "##hoe": 14490, - "prehistoric": 14491, - "coke": 14492, - "turret": 14493, - "mutations": 14494, - "nonprofit": 14495, - "pits": 14496, - "monaco": 14497, - "##ي": 14498, - "##usion": 14499, - "prominently": 14500, - "dispatched": 14501, - "podium": 14502, - "##mir": 14503, - "uci": 14504, - "##uation": 14505, - "133": 14506, - "fortifications": 14507, - "birthplace": 14508, - "kendall": 14509, - "##lby": 14510, - "##oll": 14511, - "preacher": 14512, - "rack": 14513, - "goodman": 14514, - "##rman": 14515, - "persistent": 14516, - "##ott": 14517, - "countless": 14518, - "jaime": 14519, - "recorder": 14520, - "lexington": 14521, - "persecution": 14522, - "jumps": 14523, - "renewal": 14524, - "wagons": 14525, - "##11": 14526, - "crushing": 14527, - "##holder": 14528, - "decorations": 14529, - "##lake": 14530, - "abundance": 14531, - "wrath": 14532, - "laundry": 14533, - "£1": 14534, - "garde": 14535, - "##rp": 14536, - "jeanne": 14537, - "beetles": 14538, - "peasant": 14539, - "##sl": 14540, - "splitting": 14541, - "caste": 14542, - "sergei": 14543, - "##rer": 14544, - "##ema": 14545, - "scripts": 14546, - "##ively": 14547, - "rub": 14548, - "satellites": 14549, - "##vor": 14550, - "inscribed": 14551, - "verlag": 14552, - "scrapped": 14553, - "gale": 14554, - "packages": 14555, - "chick": 14556, - "potato": 14557, - "slogan": 14558, - "kathleen": 14559, - "arabs": 14560, - "##culture": 14561, - "counterparts": 14562, - "reminiscent": 14563, - "choral": 14564, - "##tead": 14565, - "rand": 14566, - "retains": 14567, - "bushes": 14568, - "dane": 14569, - "accomplish": 14570, - "courtesy": 14571, - "closes": 14572, - "##oth": 14573, - "slaughter": 14574, - "hague": 14575, - "krakow": 14576, - "lawson": 14577, - "tailed": 14578, - "elias": 14579, - "ginger": 14580, - "##ttes": 14581, - "canopy": 14582, - "betrayal": 14583, - "rebuilding": 14584, - "turf": 14585, - "##hof": 14586, - "frowning": 14587, - "allegiance": 14588, - "brigades": 14589, - "kicks": 14590, - "rebuild": 14591, - "polls": 14592, - "alias": 14593, - "nationalism": 14594, - "td": 14595, - "rowan": 14596, - "audition": 14597, - "bowie": 14598, - "fortunately": 14599, - "recognizes": 14600, - "harp": 14601, - "dillon": 14602, - "horrified": 14603, - "##oro": 14604, - "renault": 14605, - "##tics": 14606, - "ropes": 14607, - "##α": 14608, - "presumed": 14609, - "rewarded": 14610, - "infrared": 14611, - "wiping": 14612, - "accelerated": 14613, - "illustration": 14614, - "##rid": 14615, - "presses": 14616, - "practitioners": 14617, - "badminton": 14618, - "##iard": 14619, - "detained": 14620, - "##tera": 14621, - "recognizing": 14622, - "relates": 14623, - "misery": 14624, - "##sies": 14625, - "##tly": 14626, - "reproduction": 14627, - "piercing": 14628, - "potatoes": 14629, - "thornton": 14630, - "esther": 14631, - "manners": 14632, - "hbo": 14633, - "##aan": 14634, - "ours": 14635, - "bullshit": 14636, - "ernie": 14637, - "perennial": 14638, - "sensitivity": 14639, - "illuminated": 14640, - "rupert": 14641, - "##jin": 14642, - "##iss": 14643, - "##ear": 14644, - "rfc": 14645, - "nassau": 14646, - "##dock": 14647, - "staggered": 14648, - "socialism": 14649, - "##haven": 14650, - "appointments": 14651, - "nonsense": 14652, - "prestige": 14653, - "sharma": 14654, - "haul": 14655, - "##tical": 14656, - "solidarity": 14657, - "gps": 14658, - "##ook": 14659, - "##rata": 14660, - "igor": 14661, - "pedestrian": 14662, - "##uit": 14663, - "baxter": 14664, - "tenants": 14665, - "wires": 14666, - "medication": 14667, - "unlimited": 14668, - "guiding": 14669, - "impacts": 14670, - "diabetes": 14671, - "##rama": 14672, - "sasha": 14673, - "pas": 14674, - "clive": 14675, - "extraction": 14676, - "131": 14677, - "continually": 14678, - "constraints": 14679, - "##bilities": 14680, - "sonata": 14681, - "hunted": 14682, - "sixteenth": 14683, - "chu": 14684, - "planting": 14685, - "quote": 14686, - "mayer": 14687, - "pretended": 14688, - "abs": 14689, - "spat": 14690, - "##hua": 14691, - "ceramic": 14692, - "##cci": 14693, - "curtains": 14694, - "pigs": 14695, - "pitching": 14696, - "##dad": 14697, - "latvian": 14698, - "sore": 14699, - "dayton": 14700, - "##sted": 14701, - "##qi": 14702, - "patrols": 14703, - "slice": 14704, - "playground": 14705, - "##nted": 14706, - "shone": 14707, - "stool": 14708, - "apparatus": 14709, - "inadequate": 14710, - "mates": 14711, - "treason": 14712, - "##ija": 14713, - "desires": 14714, - "##liga": 14715, - "##croft": 14716, - "somalia": 14717, - "laurent": 14718, - "mir": 14719, - "leonardo": 14720, - "oracle": 14721, - "grape": 14722, - "obliged": 14723, - "chevrolet": 14724, - "thirteenth": 14725, - "stunning": 14726, - "enthusiastic": 14727, - "##ede": 14728, - "accounted": 14729, - "concludes": 14730, - "currents": 14731, - "basil": 14732, - "##kovic": 14733, - "drought": 14734, - "##rica": 14735, - "mai": 14736, - "##aire": 14737, - "shove": 14738, - "posting": 14739, - "##shed": 14740, - "pilgrimage": 14741, - "humorous": 14742, - "packing": 14743, - "fry": 14744, - "pencil": 14745, - "wines": 14746, - "smells": 14747, - "144": 14748, - "marilyn": 14749, - "aching": 14750, - "newest": 14751, - "clung": 14752, - "bon": 14753, - "neighbours": 14754, - "sanctioned": 14755, - "##pie": 14756, - "mug": 14757, - "##stock": 14758, - "drowning": 14759, - "##mma": 14760, - "hydraulic": 14761, - "##vil": 14762, - "hiring": 14763, - "reminder": 14764, - "lilly": 14765, - "investigators": 14766, - "##ncies": 14767, - "sour": 14768, - "##eous": 14769, - "compulsory": 14770, - "packet": 14771, - "##rion": 14772, - "##graphic": 14773, - "##elle": 14774, - "cannes": 14775, - "##inate": 14776, - "depressed": 14777, - "##rit": 14778, - "heroic": 14779, - "importantly": 14780, - "theresa": 14781, - "##tled": 14782, - "conway": 14783, - "saturn": 14784, - "marginal": 14785, - "rae": 14786, - "##xia": 14787, - "corresponds": 14788, - "royce": 14789, - "pact": 14790, - "jasper": 14791, - "explosives": 14792, - "packaging": 14793, - "aluminium": 14794, - "##ttered": 14795, - "denotes": 14796, - "rhythmic": 14797, - "spans": 14798, - "assignments": 14799, - "hereditary": 14800, - "outlined": 14801, - "originating": 14802, - "sundays": 14803, - "lad": 14804, - "reissued": 14805, - "greeting": 14806, - "beatrice": 14807, - "##dic": 14808, - "pillar": 14809, - "marcos": 14810, - "plots": 14811, - "handbook": 14812, - "alcoholic": 14813, - "judiciary": 14814, - "avant": 14815, - "slides": 14816, - "extract": 14817, - "masculine": 14818, - "blur": 14819, - "##eum": 14820, - "##force": 14821, - "homage": 14822, - "trembled": 14823, - "owens": 14824, - "hymn": 14825, - "trey": 14826, - "omega": 14827, - "signaling": 14828, - "socks": 14829, - "accumulated": 14830, - "reacted": 14831, - "attic": 14832, - "theo": 14833, - "lining": 14834, - "angie": 14835, - "distraction": 14836, - "primera": 14837, - "talbot": 14838, - "##key": 14839, - "1200": 14840, - "ti": 14841, - "creativity": 14842, - "billed": 14843, - "##hey": 14844, - "deacon": 14845, - "eduardo": 14846, - "identifies": 14847, - "proposition": 14848, - "dizzy": 14849, - "gunner": 14850, - "hogan": 14851, - "##yam": 14852, - "##pping": 14853, - "##hol": 14854, - "ja": 14855, - "##chan": 14856, - "jensen": 14857, - "reconstructed": 14858, - "##berger": 14859, - "clearance": 14860, - "darius": 14861, - "##nier": 14862, - "abe": 14863, - "harlem": 14864, - "plea": 14865, - "dei": 14866, - "circled": 14867, - "emotionally": 14868, - "notation": 14869, - "fascist": 14870, - "neville": 14871, - "exceeded": 14872, - "upwards": 14873, - "viable": 14874, - "ducks": 14875, - "##fo": 14876, - "workforce": 14877, - "racer": 14878, - "limiting": 14879, - "shri": 14880, - "##lson": 14881, - "possesses": 14882, - "1600": 14883, - "kerr": 14884, - "moths": 14885, - "devastating": 14886, - "laden": 14887, - "disturbing": 14888, - "locking": 14889, - "##cture": 14890, - "gal": 14891, - "fearing": 14892, - "accreditation": 14893, - "flavor": 14894, - "aide": 14895, - "1870s": 14896, - "mountainous": 14897, - "##baum": 14898, - "melt": 14899, - "##ures": 14900, - "motel": 14901, - "texture": 14902, - "servers": 14903, - "soda": 14904, - "##mb": 14905, - "herd": 14906, - "##nium": 14907, - "erect": 14908, - "puzzled": 14909, - "hum": 14910, - "peggy": 14911, - "examinations": 14912, - "gould": 14913, - "testified": 14914, - "geoff": 14915, - "ren": 14916, - "devised": 14917, - "sacks": 14918, - "##law": 14919, - "denial": 14920, - "posters": 14921, - "grunted": 14922, - "cesar": 14923, - "tutor": 14924, - "ec": 14925, - "gerry": 14926, - "offerings": 14927, - "byrne": 14928, - "falcons": 14929, - "combinations": 14930, - "ct": 14931, - "incoming": 14932, - "pardon": 14933, - "rocking": 14934, - "26th": 14935, - "avengers": 14936, - "flared": 14937, - "mankind": 14938, - "seller": 14939, - "uttar": 14940, - "loch": 14941, - "nadia": 14942, - "stroking": 14943, - "exposing": 14944, - "##hd": 14945, - "fertile": 14946, - "ancestral": 14947, - "instituted": 14948, - "##has": 14949, - "noises": 14950, - "prophecy": 14951, - "taxation": 14952, - "eminent": 14953, - "vivid": 14954, - "pol": 14955, - "##bol": 14956, - "dart": 14957, - "indirect": 14958, - "multimedia": 14959, - "notebook": 14960, - "upside": 14961, - "displaying": 14962, - "adrenaline": 14963, - "referenced": 14964, - "geometric": 14965, - "##iving": 14966, - "progression": 14967, - "##ddy": 14968, - "blunt": 14969, - "announce": 14970, - "##far": 14971, - "implementing": 14972, - "##lav": 14973, - "aggression": 14974, - "liaison": 14975, - "cooler": 14976, - "cares": 14977, - "headache": 14978, - "plantations": 14979, - "gorge": 14980, - "dots": 14981, - "impulse": 14982, - "thickness": 14983, - "ashamed": 14984, - "averaging": 14985, - "kathy": 14986, - "obligation": 14987, - "precursor": 14988, - "137": 14989, - "fowler": 14990, - "symmetry": 14991, - "thee": 14992, - "225": 14993, - "hears": 14994, - "##rai": 14995, - "undergoing": 14996, - "ads": 14997, - "butcher": 14998, - "bowler": 14999, - "##lip": 15000, - "cigarettes": 15001, - "subscription": 15002, - "goodness": 15003, - "##ically": 15004, - "browne": 15005, - "##hos": 15006, - "##tech": 15007, - "kyoto": 15008, - "donor": 15009, - "##erty": 15010, - "damaging": 15011, - "friction": 15012, - "drifting": 15013, - "expeditions": 15014, - "hardened": 15015, - "prostitution": 15016, - "152": 15017, - "fauna": 15018, - "blankets": 15019, - "claw": 15020, - "tossing": 15021, - "snarled": 15022, - "butterflies": 15023, - "recruits": 15024, - "investigative": 15025, - "coated": 15026, - "healed": 15027, - "138": 15028, - "communal": 15029, - "hai": 15030, - "xiii": 15031, - "academics": 15032, - "boone": 15033, - "psychologist": 15034, - "restless": 15035, - "lahore": 15036, - "stephens": 15037, - "mba": 15038, - "brendan": 15039, - "foreigners": 15040, - "printer": 15041, - "##pc": 15042, - "ached": 15043, - "explode": 15044, - "27th": 15045, - "deed": 15046, - "scratched": 15047, - "dared": 15048, - "##pole": 15049, - "cardiac": 15050, - "1780": 15051, - "okinawa": 15052, - "proto": 15053, - "commando": 15054, - "compelled": 15055, - "oddly": 15056, - "electrons": 15057, - "##base": 15058, - "replica": 15059, - "thanksgiving": 15060, - "##rist": 15061, - "sheila": 15062, - "deliberate": 15063, - "stafford": 15064, - "tidal": 15065, - "representations": 15066, - "hercules": 15067, - "ou": 15068, - "##path": 15069, - "##iated": 15070, - "kidnapping": 15071, - "lenses": 15072, - "##tling": 15073, - "deficit": 15074, - "samoa": 15075, - "mouths": 15076, - "consuming": 15077, - "computational": 15078, - "maze": 15079, - "granting": 15080, - "smirk": 15081, - "razor": 15082, - "fixture": 15083, - "ideals": 15084, - "inviting": 15085, - "aiden": 15086, - "nominal": 15087, - "##vs": 15088, - "issuing": 15089, - "julio": 15090, - "pitt": 15091, - "ramsey": 15092, - "docks": 15093, - "##oss": 15094, - "exhaust": 15095, - "##owed": 15096, - "bavarian": 15097, - "draped": 15098, - "anterior": 15099, - "mating": 15100, - "ethiopian": 15101, - "explores": 15102, - "noticing": 15103, - "##nton": 15104, - "discarded": 15105, - "convenience": 15106, - "hoffman": 15107, - "endowment": 15108, - "beasts": 15109, - "cartridge": 15110, - "mormon": 15111, - "paternal": 15112, - "probe": 15113, - "sleeves": 15114, - "interfere": 15115, - "lump": 15116, - "deadline": 15117, - "##rail": 15118, - "jenks": 15119, - "bulldogs": 15120, - "scrap": 15121, - "alternating": 15122, - "justified": 15123, - "reproductive": 15124, - "nam": 15125, - "seize": 15126, - "descending": 15127, - "secretariat": 15128, - "kirby": 15129, - "coupe": 15130, - "grouped": 15131, - "smash": 15132, - "panther": 15133, - "sedan": 15134, - "tapping": 15135, - "##18": 15136, - "lola": 15137, - "cheer": 15138, - "germanic": 15139, - "unfortunate": 15140, - "##eter": 15141, - "unrelated": 15142, - "##fan": 15143, - "subordinate": 15144, - "##sdale": 15145, - "suzanne": 15146, - "advertisement": 15147, - "##ility": 15148, - "horsepower": 15149, - "##lda": 15150, - "cautiously": 15151, - "discourse": 15152, - "luigi": 15153, - "##mans": 15154, - "##fields": 15155, - "noun": 15156, - "prevalent": 15157, - "mao": 15158, - "schneider": 15159, - "everett": 15160, - "surround": 15161, - "governorate": 15162, - "kira": 15163, - "##avia": 15164, - "westward": 15165, - "##take": 15166, - "misty": 15167, - "rails": 15168, - "sustainability": 15169, - "134": 15170, - "unused": 15171, - "##rating": 15172, - "packs": 15173, - "toast": 15174, - "unwilling": 15175, - "regulate": 15176, - "thy": 15177, - "suffrage": 15178, - "nile": 15179, - "awe": 15180, - "assam": 15181, - "definitions": 15182, - "travelers": 15183, - "affordable": 15184, - "##rb": 15185, - "conferred": 15186, - "sells": 15187, - "undefeated": 15188, - "beneficial": 15189, - "torso": 15190, - "basal": 15191, - "repeating": 15192, - "remixes": 15193, - "##pass": 15194, - "bahrain": 15195, - "cables": 15196, - "fang": 15197, - "##itated": 15198, - "excavated": 15199, - "numbering": 15200, - "statutory": 15201, - "##rey": 15202, - "deluxe": 15203, - "##lian": 15204, - "forested": 15205, - "ramirez": 15206, - "derbyshire": 15207, - "zeus": 15208, - "slamming": 15209, - "transfers": 15210, - "astronomer": 15211, - "banana": 15212, - "lottery": 15213, - "berg": 15214, - "histories": 15215, - "bamboo": 15216, - "##uchi": 15217, - "resurrection": 15218, - "posterior": 15219, - "bowls": 15220, - "vaguely": 15221, - "##thi": 15222, - "thou": 15223, - "preserving": 15224, - "tensed": 15225, - "offence": 15226, - "##inas": 15227, - "meyrick": 15228, - "callum": 15229, - "ridden": 15230, - "watt": 15231, - "langdon": 15232, - "tying": 15233, - "lowland": 15234, - "snorted": 15235, - "daring": 15236, - "truman": 15237, - "##hale": 15238, - "##girl": 15239, - "aura": 15240, - "overly": 15241, - "filing": 15242, - "weighing": 15243, - "goa": 15244, - "infections": 15245, - "philanthropist": 15246, - "saunders": 15247, - "eponymous": 15248, - "##owski": 15249, - "latitude": 15250, - "perspectives": 15251, - "reviewing": 15252, - "mets": 15253, - "commandant": 15254, - "radial": 15255, - "##kha": 15256, - "flashlight": 15257, - "reliability": 15258, - "koch": 15259, - "vowels": 15260, - "amazed": 15261, - "ada": 15262, - "elaine": 15263, - "supper": 15264, - "##rth": 15265, - "##encies": 15266, - "predator": 15267, - "debated": 15268, - "soviets": 15269, - "cola": 15270, - "##boards": 15271, - "##nah": 15272, - "compartment": 15273, - "crooked": 15274, - "arbitrary": 15275, - "fourteenth": 15276, - "##ctive": 15277, - "havana": 15278, - "majors": 15279, - "steelers": 15280, - "clips": 15281, - "profitable": 15282, - "ambush": 15283, - "exited": 15284, - "packers": 15285, - "##tile": 15286, - "nude": 15287, - "cracks": 15288, - "fungi": 15289, - "##е": 15290, - "limb": 15291, - "trousers": 15292, - "josie": 15293, - "shelby": 15294, - "tens": 15295, - "frederic": 15296, - "##ος": 15297, - "definite": 15298, - "smoothly": 15299, - "constellation": 15300, - "insult": 15301, - "baton": 15302, - "discs": 15303, - "lingering": 15304, - "##nco": 15305, - "conclusions": 15306, - "lent": 15307, - "staging": 15308, - "becker": 15309, - "grandpa": 15310, - "shaky": 15311, - "##tron": 15312, - "einstein": 15313, - "obstacles": 15314, - "sk": 15315, - "adverse": 15316, - "elle": 15317, - "economically": 15318, - "##moto": 15319, - "mccartney": 15320, - "thor": 15321, - "dismissal": 15322, - "motions": 15323, - "readings": 15324, - "nostrils": 15325, - "treatise": 15326, - "##pace": 15327, - "squeezing": 15328, - "evidently": 15329, - "prolonged": 15330, - "1783": 15331, - "venezuelan": 15332, - "je": 15333, - "marguerite": 15334, - "beirut": 15335, - "takeover": 15336, - "shareholders": 15337, - "##vent": 15338, - "denise": 15339, - "digit": 15340, - "airplay": 15341, - "norse": 15342, - "##bbling": 15343, - "imaginary": 15344, - "pills": 15345, - "hubert": 15346, - "blaze": 15347, - "vacated": 15348, - "eliminating": 15349, - "##ello": 15350, - "vine": 15351, - "mansfield": 15352, - "##tty": 15353, - "retrospective": 15354, - "barrow": 15355, - "borne": 15356, - "clutch": 15357, - "bail": 15358, - "forensic": 15359, - "weaving": 15360, - "##nett": 15361, - "##witz": 15362, - "desktop": 15363, - "citadel": 15364, - "promotions": 15365, - "worrying": 15366, - "dorset": 15367, - "ieee": 15368, - "subdivided": 15369, - "##iating": 15370, - "manned": 15371, - "expeditionary": 15372, - "pickup": 15373, - "synod": 15374, - "chuckle": 15375, - "185": 15376, - "barney": 15377, - "##rz": 15378, - "##ffin": 15379, - "functionality": 15380, - "karachi": 15381, - "litigation": 15382, - "meanings": 15383, - "uc": 15384, - "lick": 15385, - "turbo": 15386, - "anders": 15387, - "##ffed": 15388, - "execute": 15389, - "curl": 15390, - "oppose": 15391, - "ankles": 15392, - "typhoon": 15393, - "##د": 15394, - "##ache": 15395, - "##asia": 15396, - "linguistics": 15397, - "compassion": 15398, - "pressures": 15399, - "grazing": 15400, - "perfection": 15401, - "##iting": 15402, - "immunity": 15403, - "monopoly": 15404, - "muddy": 15405, - "backgrounds": 15406, - "136": 15407, - "namibia": 15408, - "francesca": 15409, - "monitors": 15410, - "attracting": 15411, - "stunt": 15412, - "tuition": 15413, - "##ии": 15414, - "vegetable": 15415, - "##mates": 15416, - "##quent": 15417, - "mgm": 15418, - "jen": 15419, - "complexes": 15420, - "forts": 15421, - "##ond": 15422, - "cellar": 15423, - "bites": 15424, - "seventeenth": 15425, - "royals": 15426, - "flemish": 15427, - "failures": 15428, - "mast": 15429, - "charities": 15430, - "##cular": 15431, - "peruvian": 15432, - "capitals": 15433, - "macmillan": 15434, - "ipswich": 15435, - "outward": 15436, - "frigate": 15437, - "postgraduate": 15438, - "folds": 15439, - "employing": 15440, - "##ouse": 15441, - "concurrently": 15442, - "fiery": 15443, - "##tai": 15444, - "contingent": 15445, - "nightmares": 15446, - "monumental": 15447, - "nicaragua": 15448, - "##kowski": 15449, - "lizard": 15450, - "mal": 15451, - "fielding": 15452, - "gig": 15453, - "reject": 15454, - "##pad": 15455, - "harding": 15456, - "##ipe": 15457, - "coastline": 15458, - "##cin": 15459, - "##nos": 15460, - "beethoven": 15461, - "humphrey": 15462, - "innovations": 15463, - "##tam": 15464, - "##nge": 15465, - "norris": 15466, - "doris": 15467, - "solicitor": 15468, - "huang": 15469, - "obey": 15470, - "141": 15471, - "##lc": 15472, - "niagara": 15473, - "##tton": 15474, - "shelves": 15475, - "aug": 15476, - "bourbon": 15477, - "curry": 15478, - "nightclub": 15479, - "specifications": 15480, - "hilton": 15481, - "##ndo": 15482, - "centennial": 15483, - "dispersed": 15484, - "worm": 15485, - "neglected": 15486, - "briggs": 15487, - "sm": 15488, - "font": 15489, - "kuala": 15490, - "uneasy": 15491, - "plc": 15492, - "##nstein": 15493, - "##bound": 15494, - "##aking": 15495, - "##burgh": 15496, - "awaiting": 15497, - "pronunciation": 15498, - "##bbed": 15499, - "##quest": 15500, - "eh": 15501, - "optimal": 15502, - "zhu": 15503, - "raped": 15504, - "greens": 15505, - "presided": 15506, - "brenda": 15507, - "worries": 15508, - "##life": 15509, - "venetian": 15510, - "marxist": 15511, - "turnout": 15512, - "##lius": 15513, - "refined": 15514, - "braced": 15515, - "sins": 15516, - "grasped": 15517, - "sunderland": 15518, - "nickel": 15519, - "speculated": 15520, - "lowell": 15521, - "cyrillic": 15522, - "communism": 15523, - "fundraising": 15524, - "resembling": 15525, - "colonists": 15526, - "mutant": 15527, - "freddie": 15528, - "usc": 15529, - "##mos": 15530, - "gratitude": 15531, - "##run": 15532, - "mural": 15533, - "##lous": 15534, - "chemist": 15535, - "wi": 15536, - "reminds": 15537, - "28th": 15538, - "steals": 15539, - "tess": 15540, - "pietro": 15541, - "##ingen": 15542, - "promoter": 15543, - "ri": 15544, - "microphone": 15545, - "honoured": 15546, - "rai": 15547, - "sant": 15548, - "##qui": 15549, - "feather": 15550, - "##nson": 15551, - "burlington": 15552, - "kurdish": 15553, - "terrorists": 15554, - "deborah": 15555, - "sickness": 15556, - "##wed": 15557, - "##eet": 15558, - "hazard": 15559, - "irritated": 15560, - "desperation": 15561, - "veil": 15562, - "clarity": 15563, - "##rik": 15564, - "jewels": 15565, - "xv": 15566, - "##gged": 15567, - "##ows": 15568, - "##cup": 15569, - "berkshire": 15570, - "unfair": 15571, - "mysteries": 15572, - "orchid": 15573, - "winced": 15574, - "exhaustion": 15575, - "renovations": 15576, - "stranded": 15577, - "obe": 15578, - "infinity": 15579, - "##nies": 15580, - "adapt": 15581, - "redevelopment": 15582, - "thanked": 15583, - "registry": 15584, - "olga": 15585, - "domingo": 15586, - "noir": 15587, - "tudor": 15588, - "ole": 15589, - "##atus": 15590, - "commenting": 15591, - "behaviors": 15592, - "##ais": 15593, - "crisp": 15594, - "pauline": 15595, - "probable": 15596, - "stirling": 15597, - "wigan": 15598, - "##bian": 15599, - "paralympics": 15600, - "panting": 15601, - "surpassed": 15602, - "##rew": 15603, - "luca": 15604, - "barred": 15605, - "pony": 15606, - "famed": 15607, - "##sters": 15608, - "cassandra": 15609, - "waiter": 15610, - "carolyn": 15611, - "exported": 15612, - "##orted": 15613, - "andres": 15614, - "destructive": 15615, - "deeds": 15616, - "jonah": 15617, - "castles": 15618, - "vacancy": 15619, - "suv": 15620, - "##glass": 15621, - "1788": 15622, - "orchard": 15623, - "yep": 15624, - "famine": 15625, - "belarusian": 15626, - "sprang": 15627, - "##forth": 15628, - "skinny": 15629, - "##mis": 15630, - "administrators": 15631, - "rotterdam": 15632, - "zambia": 15633, - "zhao": 15634, - "boiler": 15635, - "discoveries": 15636, - "##ride": 15637, - "##physics": 15638, - "lucius": 15639, - "disappointing": 15640, - "outreach": 15641, - "spoon": 15642, - "##frame": 15643, - "qualifications": 15644, - "unanimously": 15645, - "enjoys": 15646, - "regency": 15647, - "##iidae": 15648, - "stade": 15649, - "realism": 15650, - "veterinary": 15651, - "rodgers": 15652, - "dump": 15653, - "alain": 15654, - "chestnut": 15655, - "castile": 15656, - "censorship": 15657, - "rumble": 15658, - "gibbs": 15659, - "##itor": 15660, - "communion": 15661, - "reggae": 15662, - "inactivated": 15663, - "logs": 15664, - "loads": 15665, - "##houses": 15666, - "homosexual": 15667, - "##iano": 15668, - "ale": 15669, - "informs": 15670, - "##cas": 15671, - "phrases": 15672, - "plaster": 15673, - "linebacker": 15674, - "ambrose": 15675, - "kaiser": 15676, - "fascinated": 15677, - "850": 15678, - "limerick": 15679, - "recruitment": 15680, - "forge": 15681, - "mastered": 15682, - "##nding": 15683, - "leinster": 15684, - "rooted": 15685, - "threaten": 15686, - "##strom": 15687, - "borneo": 15688, - "##hes": 15689, - "suggestions": 15690, - "scholarships": 15691, - "propeller": 15692, - "documentaries": 15693, - "patronage": 15694, - "coats": 15695, - "constructing": 15696, - "invest": 15697, - "neurons": 15698, - "comet": 15699, - "entirety": 15700, - "shouts": 15701, - "identities": 15702, - "annoying": 15703, - "unchanged": 15704, - "wary": 15705, - "##antly": 15706, - "##ogy": 15707, - "neat": 15708, - "oversight": 15709, - "##kos": 15710, - "phillies": 15711, - "replay": 15712, - "constance": 15713, - "##kka": 15714, - "incarnation": 15715, - "humble": 15716, - "skies": 15717, - "minus": 15718, - "##acy": 15719, - "smithsonian": 15720, - "##chel": 15721, - "guerrilla": 15722, - "jar": 15723, - "cadets": 15724, - "##plate": 15725, - "surplus": 15726, - "audit": 15727, - "##aru": 15728, - "cracking": 15729, - "joanna": 15730, - "louisa": 15731, - "pacing": 15732, - "##lights": 15733, - "intentionally": 15734, - "##iri": 15735, - "diner": 15736, - "nwa": 15737, - "imprint": 15738, - "australians": 15739, - "tong": 15740, - "unprecedented": 15741, - "bunker": 15742, - "naive": 15743, - "specialists": 15744, - "ark": 15745, - "nichols": 15746, - "railing": 15747, - "leaked": 15748, - "pedal": 15749, - "##uka": 15750, - "shrub": 15751, - "longing": 15752, - "roofs": 15753, - "v8": 15754, - "captains": 15755, - "neural": 15756, - "tuned": 15757, - "##ntal": 15758, - "##jet": 15759, - "emission": 15760, - "medina": 15761, - "frantic": 15762, - "codex": 15763, - "definitive": 15764, - "sid": 15765, - "abolition": 15766, - "intensified": 15767, - "stocks": 15768, - "enrique": 15769, - "sustain": 15770, - "genoa": 15771, - "oxide": 15772, - "##written": 15773, - "clues": 15774, - "cha": 15775, - "##gers": 15776, - "tributaries": 15777, - "fragment": 15778, - "venom": 15779, - "##rity": 15780, - "##ente": 15781, - "##sca": 15782, - "muffled": 15783, - "vain": 15784, - "sire": 15785, - "laos": 15786, - "##ingly": 15787, - "##hana": 15788, - "hastily": 15789, - "snapping": 15790, - "surfaced": 15791, - "sentiment": 15792, - "motive": 15793, - "##oft": 15794, - "contests": 15795, - "approximate": 15796, - "mesa": 15797, - "luckily": 15798, - "dinosaur": 15799, - "exchanges": 15800, - "propelled": 15801, - "accord": 15802, - "bourne": 15803, - "relieve": 15804, - "tow": 15805, - "masks": 15806, - "offended": 15807, - "##ues": 15808, - "cynthia": 15809, - "##mmer": 15810, - "rains": 15811, - "bartender": 15812, - "zinc": 15813, - "reviewers": 15814, - "lois": 15815, - "##sai": 15816, - "legged": 15817, - "arrogant": 15818, - "rafe": 15819, - "rosie": 15820, - "comprise": 15821, - "handicap": 15822, - "blockade": 15823, - "inlet": 15824, - "lagoon": 15825, - "copied": 15826, - "drilling": 15827, - "shelley": 15828, - "petals": 15829, - "##inian": 15830, - "mandarin": 15831, - "obsolete": 15832, - "##inated": 15833, - "onward": 15834, - "arguably": 15835, - "productivity": 15836, - "cindy": 15837, - "praising": 15838, - "seldom": 15839, - "busch": 15840, - "discusses": 15841, - "raleigh": 15842, - "shortage": 15843, - "ranged": 15844, - "stanton": 15845, - "encouragement": 15846, - "firstly": 15847, - "conceded": 15848, - "overs": 15849, - "temporal": 15850, - "##uke": 15851, - "cbe": 15852, - "##bos": 15853, - "woo": 15854, - "certainty": 15855, - "pumps": 15856, - "##pton": 15857, - "stalked": 15858, - "##uli": 15859, - "lizzie": 15860, - "periodic": 15861, - "thieves": 15862, - "weaker": 15863, - "##night": 15864, - "gases": 15865, - "shoving": 15866, - "chooses": 15867, - "wc": 15868, - "##chemical": 15869, - "prompting": 15870, - "weights": 15871, - "##kill": 15872, - "robust": 15873, - "flanked": 15874, - "sticky": 15875, - "hu": 15876, - "tuberculosis": 15877, - "##eb": 15878, - "##eal": 15879, - "christchurch": 15880, - "resembled": 15881, - "wallet": 15882, - "reese": 15883, - "inappropriate": 15884, - "pictured": 15885, - "distract": 15886, - "fixing": 15887, - "fiddle": 15888, - "giggled": 15889, - "burger": 15890, - "heirs": 15891, - "hairy": 15892, - "mechanic": 15893, - "torque": 15894, - "apache": 15895, - "obsessed": 15896, - "chiefly": 15897, - "cheng": 15898, - "logging": 15899, - "##tag": 15900, - "extracted": 15901, - "meaningful": 15902, - "numb": 15903, - "##vsky": 15904, - "gloucestershire": 15905, - "reminding": 15906, - "##bay": 15907, - "unite": 15908, - "##lit": 15909, - "breeds": 15910, - "diminished": 15911, - "clown": 15912, - "glove": 15913, - "1860s": 15914, - "##ن": 15915, - "##ug": 15916, - "archibald": 15917, - "focal": 15918, - "freelance": 15919, - "sliced": 15920, - "depiction": 15921, - "##yk": 15922, - "organism": 15923, - "switches": 15924, - "sights": 15925, - "stray": 15926, - "crawling": 15927, - "##ril": 15928, - "lever": 15929, - "leningrad": 15930, - "interpretations": 15931, - "loops": 15932, - "anytime": 15933, - "reel": 15934, - "alicia": 15935, - "delighted": 15936, - "##ech": 15937, - "inhaled": 15938, - "xiv": 15939, - "suitcase": 15940, - "bernie": 15941, - "vega": 15942, - "licenses": 15943, - "northampton": 15944, - "exclusion": 15945, - "induction": 15946, - "monasteries": 15947, - "racecourse": 15948, - "homosexuality": 15949, - "##right": 15950, - "##sfield": 15951, - "##rky": 15952, - "dimitri": 15953, - "michele": 15954, - "alternatives": 15955, - "ions": 15956, - "commentators": 15957, - "genuinely": 15958, - "objected": 15959, - "pork": 15960, - "hospitality": 15961, - "fencing": 15962, - "stephan": 15963, - "warships": 15964, - "peripheral": 15965, - "wit": 15966, - "drunken": 15967, - "wrinkled": 15968, - "quentin": 15969, - "spends": 15970, - "departing": 15971, - "chung": 15972, - "numerical": 15973, - "spokesperson": 15974, - "##zone": 15975, - "johannesburg": 15976, - "caliber": 15977, - "killers": 15978, - "##udge": 15979, - "assumes": 15980, - "neatly": 15981, - "demographic": 15982, - "abigail": 15983, - "bloc": 15984, - "##vel": 15985, - "mounting": 15986, - "##lain": 15987, - "bentley": 15988, - "slightest": 15989, - "xu": 15990, - "recipients": 15991, - "##jk": 15992, - "merlin": 15993, - "##writer": 15994, - "seniors": 15995, - "prisons": 15996, - "blinking": 15997, - "hindwings": 15998, - "flickered": 15999, - "kappa": 16000, - "##hel": 16001, - "80s": 16002, - "strengthening": 16003, - "appealing": 16004, - "brewing": 16005, - "gypsy": 16006, - "mali": 16007, - "lashes": 16008, - "hulk": 16009, - "unpleasant": 16010, - "harassment": 16011, - "bio": 16012, - "treaties": 16013, - "predict": 16014, - "instrumentation": 16015, - "pulp": 16016, - "troupe": 16017, - "boiling": 16018, - "mantle": 16019, - "##ffe": 16020, - "ins": 16021, - "##vn": 16022, - "dividing": 16023, - "handles": 16024, - "verbs": 16025, - "##onal": 16026, - "coconut": 16027, - "senegal": 16028, - "340": 16029, - "thorough": 16030, - "gum": 16031, - "momentarily": 16032, - "##sto": 16033, - "cocaine": 16034, - "panicked": 16035, - "destined": 16036, - "##turing": 16037, - "teatro": 16038, - "denying": 16039, - "weary": 16040, - "captained": 16041, - "mans": 16042, - "##hawks": 16043, - "##code": 16044, - "wakefield": 16045, - "bollywood": 16046, - "thankfully": 16047, - "##16": 16048, - "cyril": 16049, - "##wu": 16050, - "amendments": 16051, - "##bahn": 16052, - "consultation": 16053, - "stud": 16054, - "reflections": 16055, - "kindness": 16056, - "1787": 16057, - "internally": 16058, - "##ovo": 16059, - "tex": 16060, - "mosaic": 16061, - "distribute": 16062, - "paddy": 16063, - "seeming": 16064, - "143": 16065, - "##hic": 16066, - "piers": 16067, - "##15": 16068, - "##mura": 16069, - "##verse": 16070, - "popularly": 16071, - "winger": 16072, - "kang": 16073, - "sentinel": 16074, - "mccoy": 16075, - "##anza": 16076, - "covenant": 16077, - "##bag": 16078, - "verge": 16079, - "fireworks": 16080, - "suppress": 16081, - "thrilled": 16082, - "dominate": 16083, - "##jar": 16084, - "swansea": 16085, - "##60": 16086, - "142": 16087, - "reconciliation": 16088, - "##ndi": 16089, - "stiffened": 16090, - "cue": 16091, - "dorian": 16092, - "##uf": 16093, - "damascus": 16094, - "amor": 16095, - "ida": 16096, - "foremost": 16097, - "##aga": 16098, - "porsche": 16099, - "unseen": 16100, - "dir": 16101, - "##had": 16102, - "##azi": 16103, - "stony": 16104, - "lexi": 16105, - "melodies": 16106, - "##nko": 16107, - "angular": 16108, - "integer": 16109, - "podcast": 16110, - "ants": 16111, - "inherent": 16112, - "jaws": 16113, - "justify": 16114, - "persona": 16115, - "##olved": 16116, - "josephine": 16117, - "##nr": 16118, - "##ressed": 16119, - "customary": 16120, - "flashes": 16121, - "gala": 16122, - "cyrus": 16123, - "glaring": 16124, - "backyard": 16125, - "ariel": 16126, - "physiology": 16127, - "greenland": 16128, - "html": 16129, - "stir": 16130, - "avon": 16131, - "atletico": 16132, - "finch": 16133, - "methodology": 16134, - "ked": 16135, - "##lent": 16136, - "mas": 16137, - "catholicism": 16138, - "townsend": 16139, - "branding": 16140, - "quincy": 16141, - "fits": 16142, - "containers": 16143, - "1777": 16144, - "ashore": 16145, - "aragon": 16146, - "##19": 16147, - "forearm": 16148, - "poisoning": 16149, - "##sd": 16150, - "adopting": 16151, - "conquer": 16152, - "grinding": 16153, - "amnesty": 16154, - "keller": 16155, - "finances": 16156, - "evaluate": 16157, - "forged": 16158, - "lankan": 16159, - "instincts": 16160, - "##uto": 16161, - "guam": 16162, - "bosnian": 16163, - "photographed": 16164, - "workplace": 16165, - "desirable": 16166, - "protector": 16167, - "##dog": 16168, - "allocation": 16169, - "intently": 16170, - "encourages": 16171, - "willy": 16172, - "##sten": 16173, - "bodyguard": 16174, - "electro": 16175, - "brighter": 16176, - "##ν": 16177, - "bihar": 16178, - "##chev": 16179, - "lasts": 16180, - "opener": 16181, - "amphibious": 16182, - "sal": 16183, - "verde": 16184, - "arte": 16185, - "##cope": 16186, - "captivity": 16187, - "vocabulary": 16188, - "yields": 16189, - "##tted": 16190, - "agreeing": 16191, - "desmond": 16192, - "pioneered": 16193, - "##chus": 16194, - "strap": 16195, - "campaigned": 16196, - "railroads": 16197, - "##ович": 16198, - "emblem": 16199, - "##dre": 16200, - "stormed": 16201, - "501": 16202, - "##ulous": 16203, - "marijuana": 16204, - "northumberland": 16205, - "##gn": 16206, - "##nath": 16207, - "bowen": 16208, - "landmarks": 16209, - "beaumont": 16210, - "##qua": 16211, - "danube": 16212, - "##bler": 16213, - "attorneys": 16214, - "th": 16215, - "ge": 16216, - "flyers": 16217, - "critique": 16218, - "villains": 16219, - "cass": 16220, - "mutation": 16221, - "acc": 16222, - "##0s": 16223, - "colombo": 16224, - "mckay": 16225, - "motif": 16226, - "sampling": 16227, - "concluding": 16228, - "syndicate": 16229, - "##rell": 16230, - "neon": 16231, - "stables": 16232, - "ds": 16233, - "warnings": 16234, - "clint": 16235, - "mourning": 16236, - "wilkinson": 16237, - "##tated": 16238, - "merrill": 16239, - "leopard": 16240, - "evenings": 16241, - "exhaled": 16242, - "emil": 16243, - "sonia": 16244, - "ezra": 16245, - "discrete": 16246, - "stove": 16247, - "farrell": 16248, - "fifteenth": 16249, - "prescribed": 16250, - "superhero": 16251, - "##rier": 16252, - "worms": 16253, - "helm": 16254, - "wren": 16255, - "##duction": 16256, - "##hc": 16257, - "expo": 16258, - "##rator": 16259, - "hq": 16260, - "unfamiliar": 16261, - "antony": 16262, - "prevents": 16263, - "acceleration": 16264, - "fiercely": 16265, - "mari": 16266, - "painfully": 16267, - "calculations": 16268, - "cheaper": 16269, - "ign": 16270, - "clifton": 16271, - "irvine": 16272, - "davenport": 16273, - "mozambique": 16274, - "##np": 16275, - "pierced": 16276, - "##evich": 16277, - "wonders": 16278, - "##wig": 16279, - "##cate": 16280, - "##iling": 16281, - "crusade": 16282, - "ware": 16283, - "##uel": 16284, - "enzymes": 16285, - "reasonably": 16286, - "mls": 16287, - "##coe": 16288, - "mater": 16289, - "ambition": 16290, - "bunny": 16291, - "eliot": 16292, - "kernel": 16293, - "##fin": 16294, - "asphalt": 16295, - "headmaster": 16296, - "torah": 16297, - "aden": 16298, - "lush": 16299, - "pins": 16300, - "waived": 16301, - "##care": 16302, - "##yas": 16303, - "joao": 16304, - "substrate": 16305, - "enforce": 16306, - "##grad": 16307, - "##ules": 16308, - "alvarez": 16309, - "selections": 16310, - "epidemic": 16311, - "tempted": 16312, - "##bit": 16313, - "bremen": 16314, - "translates": 16315, - "ensured": 16316, - "waterfront": 16317, - "29th": 16318, - "forrest": 16319, - "manny": 16320, - "malone": 16321, - "kramer": 16322, - "reigning": 16323, - "cookies": 16324, - "simpler": 16325, - "absorption": 16326, - "205": 16327, - "engraved": 16328, - "##ffy": 16329, - "evaluated": 16330, - "1778": 16331, - "haze": 16332, - "146": 16333, - "comforting": 16334, - "crossover": 16335, - "##abe": 16336, - "thorn": 16337, - "##rift": 16338, - "##imo": 16339, - "##pop": 16340, - "suppression": 16341, - "fatigue": 16342, - "cutter": 16343, - "##tr": 16344, - "201": 16345, - "wurttemberg": 16346, - "##orf": 16347, - "enforced": 16348, - "hovering": 16349, - "proprietary": 16350, - "gb": 16351, - "samurai": 16352, - "syllable": 16353, - "ascent": 16354, - "lacey": 16355, - "tick": 16356, - "lars": 16357, - "tractor": 16358, - "merchandise": 16359, - "rep": 16360, - "bouncing": 16361, - "defendants": 16362, - "##yre": 16363, - "huntington": 16364, - "##ground": 16365, - "##oko": 16366, - "standardized": 16367, - "##hor": 16368, - "##hima": 16369, - "assassinated": 16370, - "nu": 16371, - "predecessors": 16372, - "rainy": 16373, - "liar": 16374, - "assurance": 16375, - "lyrical": 16376, - "##uga": 16377, - "secondly": 16378, - "flattened": 16379, - "ios": 16380, - "parameter": 16381, - "undercover": 16382, - "##mity": 16383, - "bordeaux": 16384, - "punish": 16385, - "ridges": 16386, - "markers": 16387, - "exodus": 16388, - "inactive": 16389, - "hesitate": 16390, - "debbie": 16391, - "nyc": 16392, - "pledge": 16393, - "savoy": 16394, - "nagar": 16395, - "offset": 16396, - "organist": 16397, - "##tium": 16398, - "hesse": 16399, - "marin": 16400, - "converting": 16401, - "##iver": 16402, - "diagram": 16403, - "propulsion": 16404, - "pu": 16405, - "validity": 16406, - "reverted": 16407, - "supportive": 16408, - "##dc": 16409, - "ministries": 16410, - "clans": 16411, - "responds": 16412, - "proclamation": 16413, - "##inae": 16414, - "##ø": 16415, - "##rea": 16416, - "ein": 16417, - "pleading": 16418, - "patriot": 16419, - "sf": 16420, - "birch": 16421, - "islanders": 16422, - "strauss": 16423, - "hates": 16424, - "##dh": 16425, - "brandenburg": 16426, - "concession": 16427, - "rd": 16428, - "##ob": 16429, - "1900s": 16430, - "killings": 16431, - "textbook": 16432, - "antiquity": 16433, - "cinematography": 16434, - "wharf": 16435, - "embarrassing": 16436, - "setup": 16437, - "creed": 16438, - "farmland": 16439, - "inequality": 16440, - "centred": 16441, - "signatures": 16442, - "fallon": 16443, - "370": 16444, - "##ingham": 16445, - "##uts": 16446, - "ceylon": 16447, - "gazing": 16448, - "directive": 16449, - "laurie": 16450, - "##tern": 16451, - "globally": 16452, - "##uated": 16453, - "##dent": 16454, - "allah": 16455, - "excavation": 16456, - "threads": 16457, - "##cross": 16458, - "148": 16459, - "frantically": 16460, - "icc": 16461, - "utilize": 16462, - "determines": 16463, - "respiratory": 16464, - "thoughtful": 16465, - "receptions": 16466, - "##dicate": 16467, - "merging": 16468, - "chandra": 16469, - "seine": 16470, - "147": 16471, - "builders": 16472, - "builds": 16473, - "diagnostic": 16474, - "dev": 16475, - "visibility": 16476, - "goddamn": 16477, - "analyses": 16478, - "dhaka": 16479, - "cho": 16480, - "proves": 16481, - "chancel": 16482, - "concurrent": 16483, - "curiously": 16484, - "canadians": 16485, - "pumped": 16486, - "restoring": 16487, - "1850s": 16488, - "turtles": 16489, - "jaguar": 16490, - "sinister": 16491, - "spinal": 16492, - "traction": 16493, - "declan": 16494, - "vows": 16495, - "1784": 16496, - "glowed": 16497, - "capitalism": 16498, - "swirling": 16499, - "install": 16500, - "universidad": 16501, - "##lder": 16502, - "##oat": 16503, - "soloist": 16504, - "##genic": 16505, - "##oor": 16506, - "coincidence": 16507, - "beginnings": 16508, - "nissan": 16509, - "dip": 16510, - "resorts": 16511, - "caucasus": 16512, - "combustion": 16513, - "infectious": 16514, - "##eno": 16515, - "pigeon": 16516, - "serpent": 16517, - "##itating": 16518, - "conclude": 16519, - "masked": 16520, - "salad": 16521, - "jew": 16522, - "##gr": 16523, - "surreal": 16524, - "toni": 16525, - "##wc": 16526, - "harmonica": 16527, - "151": 16528, - "##gins": 16529, - "##etic": 16530, - "##coat": 16531, - "fishermen": 16532, - "intending": 16533, - "bravery": 16534, - "##wave": 16535, - "klaus": 16536, - "titan": 16537, - "wembley": 16538, - "taiwanese": 16539, - "ransom": 16540, - "40th": 16541, - "incorrect": 16542, - "hussein": 16543, - "eyelids": 16544, - "jp": 16545, - "cooke": 16546, - "dramas": 16547, - "utilities": 16548, - "##etta": 16549, - "##print": 16550, - "eisenhower": 16551, - "principally": 16552, - "granada": 16553, - "lana": 16554, - "##rak": 16555, - "openings": 16556, - "concord": 16557, - "##bl": 16558, - "bethany": 16559, - "connie": 16560, - "morality": 16561, - "sega": 16562, - "##mons": 16563, - "##nard": 16564, - "earnings": 16565, - "##kara": 16566, - "##cine": 16567, - "wii": 16568, - "communes": 16569, - "##rel": 16570, - "coma": 16571, - "composing": 16572, - "softened": 16573, - "severed": 16574, - "grapes": 16575, - "##17": 16576, - "nguyen": 16577, - "analyzed": 16578, - "warlord": 16579, - "hubbard": 16580, - "heavenly": 16581, - "behave": 16582, - "slovenian": 16583, - "##hit": 16584, - "##ony": 16585, - "hailed": 16586, - "filmmakers": 16587, - "trance": 16588, - "caldwell": 16589, - "skye": 16590, - "unrest": 16591, - "coward": 16592, - "likelihood": 16593, - "##aging": 16594, - "bern": 16595, - "sci": 16596, - "taliban": 16597, - "honolulu": 16598, - "propose": 16599, - "##wang": 16600, - "1700": 16601, - "browser": 16602, - "imagining": 16603, - "cobra": 16604, - "contributes": 16605, - "dukes": 16606, - "instinctively": 16607, - "conan": 16608, - "violinist": 16609, - "##ores": 16610, - "accessories": 16611, - "gradual": 16612, - "##amp": 16613, - "quotes": 16614, - "sioux": 16615, - "##dating": 16616, - "undertake": 16617, - "intercepted": 16618, - "sparkling": 16619, - "compressed": 16620, - "139": 16621, - "fungus": 16622, - "tombs": 16623, - "haley": 16624, - "imposing": 16625, - "rests": 16626, - "degradation": 16627, - "lincolnshire": 16628, - "retailers": 16629, - "wetlands": 16630, - "tulsa": 16631, - "distributor": 16632, - "dungeon": 16633, - "nun": 16634, - "greenhouse": 16635, - "convey": 16636, - "atlantis": 16637, - "aft": 16638, - "exits": 16639, - "oman": 16640, - "dresser": 16641, - "lyons": 16642, - "##sti": 16643, - "joking": 16644, - "eddy": 16645, - "judgement": 16646, - "omitted": 16647, - "digits": 16648, - "##cts": 16649, - "##game": 16650, - "juniors": 16651, - "##rae": 16652, - "cents": 16653, - "stricken": 16654, - "une": 16655, - "##ngo": 16656, - "wizards": 16657, - "weir": 16658, - "breton": 16659, - "nan": 16660, - "technician": 16661, - "fibers": 16662, - "liking": 16663, - "royalty": 16664, - "##cca": 16665, - "154": 16666, - "persia": 16667, - "terribly": 16668, - "magician": 16669, - "##rable": 16670, - "##unt": 16671, - "vance": 16672, - "cafeteria": 16673, - "booker": 16674, - "camille": 16675, - "warmer": 16676, - "##static": 16677, - "consume": 16678, - "cavern": 16679, - "gaps": 16680, - "compass": 16681, - "contemporaries": 16682, - "foyer": 16683, - "soothing": 16684, - "graveyard": 16685, - "maj": 16686, - "plunged": 16687, - "blush": 16688, - "##wear": 16689, - "cascade": 16690, - "demonstrates": 16691, - "ordinance": 16692, - "##nov": 16693, - "boyle": 16694, - "##lana": 16695, - "rockefeller": 16696, - "shaken": 16697, - "banjo": 16698, - "izzy": 16699, - "##ense": 16700, - "breathless": 16701, - "vines": 16702, - "##32": 16703, - "##eman": 16704, - "alterations": 16705, - "chromosome": 16706, - "dwellings": 16707, - "feudal": 16708, - "mole": 16709, - "153": 16710, - "catalonia": 16711, - "relics": 16712, - "tenant": 16713, - "mandated": 16714, - "##fm": 16715, - "fridge": 16716, - "hats": 16717, - "honesty": 16718, - "patented": 16719, - "raul": 16720, - "heap": 16721, - "cruisers": 16722, - "accusing": 16723, - "enlightenment": 16724, - "infants": 16725, - "wherein": 16726, - "chatham": 16727, - "contractors": 16728, - "zen": 16729, - "affinity": 16730, - "hc": 16731, - "osborne": 16732, - "piston": 16733, - "156": 16734, - "traps": 16735, - "maturity": 16736, - "##rana": 16737, - "lagos": 16738, - "##zal": 16739, - "peering": 16740, - "##nay": 16741, - "attendant": 16742, - "dealers": 16743, - "protocols": 16744, - "subset": 16745, - "prospects": 16746, - "biographical": 16747, - "##cre": 16748, - "artery": 16749, - "##zers": 16750, - "insignia": 16751, - "nuns": 16752, - "endured": 16753, - "##eration": 16754, - "recommend": 16755, - "schwartz": 16756, - "serbs": 16757, - "berger": 16758, - "cromwell": 16759, - "crossroads": 16760, - "##ctor": 16761, - "enduring": 16762, - "clasped": 16763, - "grounded": 16764, - "##bine": 16765, - "marseille": 16766, - "twitched": 16767, - "abel": 16768, - "choke": 16769, - "https": 16770, - "catalyst": 16771, - "moldova": 16772, - "italians": 16773, - "##tist": 16774, - "disastrous": 16775, - "wee": 16776, - "##oured": 16777, - "##nti": 16778, - "wwf": 16779, - "nope": 16780, - "##piration": 16781, - "##asa": 16782, - "expresses": 16783, - "thumbs": 16784, - "167": 16785, - "##nza": 16786, - "coca": 16787, - "1781": 16788, - "cheating": 16789, - "##ption": 16790, - "skipped": 16791, - "sensory": 16792, - "heidelberg": 16793, - "spies": 16794, - "satan": 16795, - "dangers": 16796, - "semifinal": 16797, - "202": 16798, - "bohemia": 16799, - "whitish": 16800, - "confusing": 16801, - "shipbuilding": 16802, - "relies": 16803, - "surgeons": 16804, - "landings": 16805, - "ravi": 16806, - "baku": 16807, - "moor": 16808, - "suffix": 16809, - "alejandro": 16810, - "##yana": 16811, - "litre": 16812, - "upheld": 16813, - "##unk": 16814, - "rajasthan": 16815, - "##rek": 16816, - "coaster": 16817, - "insists": 16818, - "posture": 16819, - "scenarios": 16820, - "etienne": 16821, - "favoured": 16822, - "appoint": 16823, - "transgender": 16824, - "elephants": 16825, - "poked": 16826, - "greenwood": 16827, - "defences": 16828, - "fulfilled": 16829, - "militant": 16830, - "somali": 16831, - "1758": 16832, - "chalk": 16833, - "potent": 16834, - "##ucci": 16835, - "migrants": 16836, - "wink": 16837, - "assistants": 16838, - "nos": 16839, - "restriction": 16840, - "activism": 16841, - "niger": 16842, - "##ario": 16843, - "colon": 16844, - "shaun": 16845, - "##sat": 16846, - "daphne": 16847, - "##erated": 16848, - "swam": 16849, - "congregations": 16850, - "reprise": 16851, - "considerations": 16852, - "magnet": 16853, - "playable": 16854, - "xvi": 16855, - "##р": 16856, - "overthrow": 16857, - "tobias": 16858, - "knob": 16859, - "chavez": 16860, - "coding": 16861, - "##mers": 16862, - "propped": 16863, - "katrina": 16864, - "orient": 16865, - "newcomer": 16866, - "##suke": 16867, - "temperate": 16868, - "##pool": 16869, - "farmhouse": 16870, - "interrogation": 16871, - "##vd": 16872, - "committing": 16873, - "##vert": 16874, - "forthcoming": 16875, - "strawberry": 16876, - "joaquin": 16877, - "macau": 16878, - "ponds": 16879, - "shocking": 16880, - "siberia": 16881, - "##cellular": 16882, - "chant": 16883, - "contributors": 16884, - "##nant": 16885, - "##ologists": 16886, - "sped": 16887, - "absorb": 16888, - "hail": 16889, - "1782": 16890, - "spared": 16891, - "##hore": 16892, - "barbados": 16893, - "karate": 16894, - "opus": 16895, - "originates": 16896, - "saul": 16897, - "##xie": 16898, - "evergreen": 16899, - "leaped": 16900, - "##rock": 16901, - "correlation": 16902, - "exaggerated": 16903, - "weekday": 16904, - "unification": 16905, - "bump": 16906, - "tracing": 16907, - "brig": 16908, - "afb": 16909, - "pathways": 16910, - "utilizing": 16911, - "##ners": 16912, - "mod": 16913, - "mb": 16914, - "disturbance": 16915, - "kneeling": 16916, - "##stad": 16917, - "##guchi": 16918, - "100th": 16919, - "pune": 16920, - "##thy": 16921, - "decreasing": 16922, - "168": 16923, - "manipulation": 16924, - "miriam": 16925, - "academia": 16926, - "ecosystem": 16927, - "occupational": 16928, - "rbi": 16929, - "##lem": 16930, - "rift": 16931, - "##14": 16932, - "rotary": 16933, - "stacked": 16934, - "incorporation": 16935, - "awakening": 16936, - "generators": 16937, - "guerrero": 16938, - "racist": 16939, - "##omy": 16940, - "cyber": 16941, - "derivatives": 16942, - "culminated": 16943, - "allie": 16944, - "annals": 16945, - "panzer": 16946, - "sainte": 16947, - "wikipedia": 16948, - "pops": 16949, - "zu": 16950, - "austro": 16951, - "##vate": 16952, - "algerian": 16953, - "politely": 16954, - "nicholson": 16955, - "mornings": 16956, - "educate": 16957, - "tastes": 16958, - "thrill": 16959, - "dartmouth": 16960, - "##gating": 16961, - "db": 16962, - "##jee": 16963, - "regan": 16964, - "differing": 16965, - "concentrating": 16966, - "choreography": 16967, - "divinity": 16968, - "##media": 16969, - "pledged": 16970, - "alexandre": 16971, - "routing": 16972, - "gregor": 16973, - "madeline": 16974, - "##idal": 16975, - "apocalypse": 16976, - "##hora": 16977, - "gunfire": 16978, - "culminating": 16979, - "elves": 16980, - "fined": 16981, - "liang": 16982, - "lam": 16983, - "programmed": 16984, - "tar": 16985, - "guessing": 16986, - "transparency": 16987, - "gabrielle": 16988, - "##gna": 16989, - "cancellation": 16990, - "flexibility": 16991, - "##lining": 16992, - "accession": 16993, - "shea": 16994, - "stronghold": 16995, - "nets": 16996, - "specializes": 16997, - "##rgan": 16998, - "abused": 16999, - "hasan": 17000, - "sgt": 17001, - "ling": 17002, - "exceeding": 17003, - "##₄": 17004, - "admiration": 17005, - "supermarket": 17006, - "##ark": 17007, - "photographers": 17008, - "specialised": 17009, - "tilt": 17010, - "resonance": 17011, - "hmm": 17012, - "perfume": 17013, - "380": 17014, - "sami": 17015, - "threatens": 17016, - "garland": 17017, - "botany": 17018, - "guarding": 17019, - "boiled": 17020, - "greet": 17021, - "puppy": 17022, - "russo": 17023, - "supplier": 17024, - "wilmington": 17025, - "vibrant": 17026, - "vijay": 17027, - "##bius": 17028, - "paralympic": 17029, - "grumbled": 17030, - "paige": 17031, - "faa": 17032, - "licking": 17033, - "margins": 17034, - "hurricanes": 17035, - "##gong": 17036, - "fest": 17037, - "grenade": 17038, - "ripping": 17039, - "##uz": 17040, - "counseling": 17041, - "weigh": 17042, - "##sian": 17043, - "needles": 17044, - "wiltshire": 17045, - "edison": 17046, - "costly": 17047, - "##not": 17048, - "fulton": 17049, - "tramway": 17050, - "redesigned": 17051, - "staffordshire": 17052, - "cache": 17053, - "gasping": 17054, - "watkins": 17055, - "sleepy": 17056, - "candidacy": 17057, - "##group": 17058, - "monkeys": 17059, - "timeline": 17060, - "throbbing": 17061, - "##bid": 17062, - "##sos": 17063, - "berth": 17064, - "uzbekistan": 17065, - "vanderbilt": 17066, - "bothering": 17067, - "overturned": 17068, - "ballots": 17069, - "gem": 17070, - "##iger": 17071, - "sunglasses": 17072, - "subscribers": 17073, - "hooker": 17074, - "compelling": 17075, - "ang": 17076, - "exceptionally": 17077, - "saloon": 17078, - "stab": 17079, - "##rdi": 17080, - "carla": 17081, - "terrifying": 17082, - "rom": 17083, - "##vision": 17084, - "coil": 17085, - "##oids": 17086, - "satisfying": 17087, - "vendors": 17088, - "31st": 17089, - "mackay": 17090, - "deities": 17091, - "overlooked": 17092, - "ambient": 17093, - "bahamas": 17094, - "felipe": 17095, - "olympia": 17096, - "whirled": 17097, - "botanist": 17098, - "advertised": 17099, - "tugging": 17100, - "##dden": 17101, - "disciples": 17102, - "morales": 17103, - "unionist": 17104, - "rites": 17105, - "foley": 17106, - "morse": 17107, - "motives": 17108, - "creepy": 17109, - "##₀": 17110, - "soo": 17111, - "##sz": 17112, - "bargain": 17113, - "highness": 17114, - "frightening": 17115, - "turnpike": 17116, - "tory": 17117, - "reorganization": 17118, - "##cer": 17119, - "depict": 17120, - "biographer": 17121, - "##walk": 17122, - "unopposed": 17123, - "manifesto": 17124, - "##gles": 17125, - "institut": 17126, - "emile": 17127, - "accidental": 17128, - "kapoor": 17129, - "##dam": 17130, - "kilkenny": 17131, - "cortex": 17132, - "lively": 17133, - "##13": 17134, - "romanesque": 17135, - "jain": 17136, - "shan": 17137, - "cannons": 17138, - "##ood": 17139, - "##ske": 17140, - "petrol": 17141, - "echoing": 17142, - "amalgamated": 17143, - "disappears": 17144, - "cautious": 17145, - "proposes": 17146, - "sanctions": 17147, - "trenton": 17148, - "##ر": 17149, - "flotilla": 17150, - "aus": 17151, - "contempt": 17152, - "tor": 17153, - "canary": 17154, - "cote": 17155, - "theirs": 17156, - "##hun": 17157, - "conceptual": 17158, - "deleted": 17159, - "fascinating": 17160, - "paso": 17161, - "blazing": 17162, - "elf": 17163, - "honourable": 17164, - "hutchinson": 17165, - "##eiro": 17166, - "##outh": 17167, - "##zin": 17168, - "surveyor": 17169, - "tee": 17170, - "amidst": 17171, - "wooded": 17172, - "reissue": 17173, - "intro": 17174, - "##ono": 17175, - "cobb": 17176, - "shelters": 17177, - "newsletter": 17178, - "hanson": 17179, - "brace": 17180, - "encoding": 17181, - "confiscated": 17182, - "dem": 17183, - "caravan": 17184, - "marino": 17185, - "scroll": 17186, - "melodic": 17187, - "cows": 17188, - "imam": 17189, - "##adi": 17190, - "##aneous": 17191, - "northward": 17192, - "searches": 17193, - "biodiversity": 17194, - "cora": 17195, - "310": 17196, - "roaring": 17197, - "##bers": 17198, - "connell": 17199, - "theologian": 17200, - "halo": 17201, - "compose": 17202, - "pathetic": 17203, - "unmarried": 17204, - "dynamo": 17205, - "##oot": 17206, - "az": 17207, - "calculation": 17208, - "toulouse": 17209, - "deserves": 17210, - "humour": 17211, - "nr": 17212, - "forgiveness": 17213, - "tam": 17214, - "undergone": 17215, - "martyr": 17216, - "pamela": 17217, - "myths": 17218, - "whore": 17219, - "counselor": 17220, - "hicks": 17221, - "290": 17222, - "heavens": 17223, - "battleship": 17224, - "electromagnetic": 17225, - "##bbs": 17226, - "stellar": 17227, - "establishments": 17228, - "presley": 17229, - "hopped": 17230, - "##chin": 17231, - "temptation": 17232, - "90s": 17233, - "wills": 17234, - "nas": 17235, - "##yuan": 17236, - "nhs": 17237, - "##nya": 17238, - "seminars": 17239, - "##yev": 17240, - "adaptations": 17241, - "gong": 17242, - "asher": 17243, - "lex": 17244, - "indicator": 17245, - "sikh": 17246, - "tobago": 17247, - "cites": 17248, - "goin": 17249, - "##yte": 17250, - "satirical": 17251, - "##gies": 17252, - "characterised": 17253, - "correspond": 17254, - "bubbles": 17255, - "lure": 17256, - "participates": 17257, - "##vid": 17258, - "eruption": 17259, - "skate": 17260, - "therapeutic": 17261, - "1785": 17262, - "canals": 17263, - "wholesale": 17264, - "defaulted": 17265, - "sac": 17266, - "460": 17267, - "petit": 17268, - "##zzled": 17269, - "virgil": 17270, - "leak": 17271, - "ravens": 17272, - "256": 17273, - "portraying": 17274, - "##yx": 17275, - "ghetto": 17276, - "creators": 17277, - "dams": 17278, - "portray": 17279, - "vicente": 17280, - "##rington": 17281, - "fae": 17282, - "namesake": 17283, - "bounty": 17284, - "##arium": 17285, - "joachim": 17286, - "##ota": 17287, - "##iser": 17288, - "aforementioned": 17289, - "axle": 17290, - "snout": 17291, - "depended": 17292, - "dismantled": 17293, - "reuben": 17294, - "480": 17295, - "##ibly": 17296, - "gallagher": 17297, - "##lau": 17298, - "##pd": 17299, - "earnest": 17300, - "##ieu": 17301, - "##iary": 17302, - "inflicted": 17303, - "objections": 17304, - "##llar": 17305, - "asa": 17306, - "gritted": 17307, - "##athy": 17308, - "jericho": 17309, - "##sea": 17310, - "##was": 17311, - "flick": 17312, - "underside": 17313, - "ceramics": 17314, - "undead": 17315, - "substituted": 17316, - "195": 17317, - "eastward": 17318, - "undoubtedly": 17319, - "wheeled": 17320, - "chimney": 17321, - "##iche": 17322, - "guinness": 17323, - "cb": 17324, - "##ager": 17325, - "siding": 17326, - "##bell": 17327, - "traitor": 17328, - "baptiste": 17329, - "disguised": 17330, - "inauguration": 17331, - "149": 17332, - "tipperary": 17333, - "choreographer": 17334, - "perched": 17335, - "warmed": 17336, - "stationary": 17337, - "eco": 17338, - "##ike": 17339, - "##ntes": 17340, - "bacterial": 17341, - "##aurus": 17342, - "flores": 17343, - "phosphate": 17344, - "##core": 17345, - "attacker": 17346, - "invaders": 17347, - "alvin": 17348, - "intersects": 17349, - "a1": 17350, - "indirectly": 17351, - "immigrated": 17352, - "businessmen": 17353, - "cornelius": 17354, - "valves": 17355, - "narrated": 17356, - "pill": 17357, - "sober": 17358, - "ul": 17359, - "nationale": 17360, - "monastic": 17361, - "applicants": 17362, - "scenery": 17363, - "##jack": 17364, - "161": 17365, - "motifs": 17366, - "constitutes": 17367, - "cpu": 17368, - "##osh": 17369, - "jurisdictions": 17370, - "sd": 17371, - "tuning": 17372, - "irritation": 17373, - "woven": 17374, - "##uddin": 17375, - "fertility": 17376, - "gao": 17377, - "##erie": 17378, - "antagonist": 17379, - "impatient": 17380, - "glacial": 17381, - "hides": 17382, - "boarded": 17383, - "denominations": 17384, - "interception": 17385, - "##jas": 17386, - "cookie": 17387, - "nicola": 17388, - "##tee": 17389, - "algebraic": 17390, - "marquess": 17391, - "bahn": 17392, - "parole": 17393, - "buyers": 17394, - "bait": 17395, - "turbines": 17396, - "paperwork": 17397, - "bestowed": 17398, - "natasha": 17399, - "renee": 17400, - "oceans": 17401, - "purchases": 17402, - "157": 17403, - "vaccine": 17404, - "215": 17405, - "##tock": 17406, - "fixtures": 17407, - "playhouse": 17408, - "integrate": 17409, - "jai": 17410, - "oswald": 17411, - "intellectuals": 17412, - "##cky": 17413, - "booked": 17414, - "nests": 17415, - "mortimer": 17416, - "##isi": 17417, - "obsession": 17418, - "sept": 17419, - "##gler": 17420, - "##sum": 17421, - "440": 17422, - "scrutiny": 17423, - "simultaneous": 17424, - "squinted": 17425, - "##shin": 17426, - "collects": 17427, - "oven": 17428, - "shankar": 17429, - "penned": 17430, - "remarkably": 17431, - "##я": 17432, - "slips": 17433, - "luggage": 17434, - "spectral": 17435, - "1786": 17436, - "collaborations": 17437, - "louie": 17438, - "consolidation": 17439, - "##ailed": 17440, - "##ivating": 17441, - "420": 17442, - "hoover": 17443, - "blackpool": 17444, - "harness": 17445, - "ignition": 17446, - "vest": 17447, - "tails": 17448, - "belmont": 17449, - "mongol": 17450, - "skinner": 17451, - "##nae": 17452, - "visually": 17453, - "mage": 17454, - "derry": 17455, - "##tism": 17456, - "##unce": 17457, - "stevie": 17458, - "transitional": 17459, - "##rdy": 17460, - "redskins": 17461, - "drying": 17462, - "prep": 17463, - "prospective": 17464, - "##21": 17465, - "annoyance": 17466, - "oversee": 17467, - "##loaded": 17468, - "fills": 17469, - "##books": 17470, - "##iki": 17471, - "announces": 17472, - "fda": 17473, - "scowled": 17474, - "respects": 17475, - "prasad": 17476, - "mystic": 17477, - "tucson": 17478, - "##vale": 17479, - "revue": 17480, - "springer": 17481, - "bankrupt": 17482, - "1772": 17483, - "aristotle": 17484, - "salvatore": 17485, - "habsburg": 17486, - "##geny": 17487, - "dal": 17488, - "natal": 17489, - "nut": 17490, - "pod": 17491, - "chewing": 17492, - "darts": 17493, - "moroccan": 17494, - "walkover": 17495, - "rosario": 17496, - "lenin": 17497, - "punjabi": 17498, - "##ße": 17499, - "grossed": 17500, - "scattering": 17501, - "wired": 17502, - "invasive": 17503, - "hui": 17504, - "polynomial": 17505, - "corridors": 17506, - "wakes": 17507, - "gina": 17508, - "portrays": 17509, - "##cratic": 17510, - "arid": 17511, - "retreating": 17512, - "erich": 17513, - "irwin": 17514, - "sniper": 17515, - "##dha": 17516, - "linen": 17517, - "lindsey": 17518, - "maneuver": 17519, - "butch": 17520, - "shutting": 17521, - "socio": 17522, - "bounce": 17523, - "commemorative": 17524, - "postseason": 17525, - "jeremiah": 17526, - "pines": 17527, - "275": 17528, - "mystical": 17529, - "beads": 17530, - "bp": 17531, - "abbas": 17532, - "furnace": 17533, - "bidding": 17534, - "consulted": 17535, - "assaulted": 17536, - "empirical": 17537, - "rubble": 17538, - "enclosure": 17539, - "sob": 17540, - "weakly": 17541, - "cancel": 17542, - "polly": 17543, - "yielded": 17544, - "##emann": 17545, - "curly": 17546, - "prediction": 17547, - "battered": 17548, - "70s": 17549, - "vhs": 17550, - "jacqueline": 17551, - "render": 17552, - "sails": 17553, - "barked": 17554, - "detailing": 17555, - "grayson": 17556, - "riga": 17557, - "sloane": 17558, - "raging": 17559, - "##yah": 17560, - "herbs": 17561, - "bravo": 17562, - "##athlon": 17563, - "alloy": 17564, - "giggle": 17565, - "imminent": 17566, - "suffers": 17567, - "assumptions": 17568, - "waltz": 17569, - "##itate": 17570, - "accomplishments": 17571, - "##ited": 17572, - "bathing": 17573, - "remixed": 17574, - "deception": 17575, - "prefix": 17576, - "##emia": 17577, - "deepest": 17578, - "##tier": 17579, - "##eis": 17580, - "balkan": 17581, - "frogs": 17582, - "##rong": 17583, - "slab": 17584, - "##pate": 17585, - "philosophers": 17586, - "peterborough": 17587, - "grains": 17588, - "imports": 17589, - "dickinson": 17590, - "rwanda": 17591, - "##atics": 17592, - "1774": 17593, - "dirk": 17594, - "lan": 17595, - "tablets": 17596, - "##rove": 17597, - "clone": 17598, - "##rice": 17599, - "caretaker": 17600, - "hostilities": 17601, - "mclean": 17602, - "##gre": 17603, - "regimental": 17604, - "treasures": 17605, - "norms": 17606, - "impose": 17607, - "tsar": 17608, - "tango": 17609, - "diplomacy": 17610, - "variously": 17611, - "complain": 17612, - "192": 17613, - "recognise": 17614, - "arrests": 17615, - "1779": 17616, - "celestial": 17617, - "pulitzer": 17618, - "##dus": 17619, - "bing": 17620, - "libretto": 17621, - "##moor": 17622, - "adele": 17623, - "splash": 17624, - "##rite": 17625, - "expectation": 17626, - "lds": 17627, - "confronts": 17628, - "##izer": 17629, - "spontaneous": 17630, - "harmful": 17631, - "wedge": 17632, - "entrepreneurs": 17633, - "buyer": 17634, - "##ope": 17635, - "bilingual": 17636, - "translate": 17637, - "rugged": 17638, - "conner": 17639, - "circulated": 17640, - "uae": 17641, - "eaton": 17642, - "##gra": 17643, - "##zzle": 17644, - "lingered": 17645, - "lockheed": 17646, - "vishnu": 17647, - "reelection": 17648, - "alonso": 17649, - "##oom": 17650, - "joints": 17651, - "yankee": 17652, - "headline": 17653, - "cooperate": 17654, - "heinz": 17655, - "laureate": 17656, - "invading": 17657, - "##sford": 17658, - "echoes": 17659, - "scandinavian": 17660, - "##dham": 17661, - "hugging": 17662, - "vitamin": 17663, - "salute": 17664, - "micah": 17665, - "hind": 17666, - "trader": 17667, - "##sper": 17668, - "radioactive": 17669, - "##ndra": 17670, - "militants": 17671, - "poisoned": 17672, - "ratified": 17673, - "remark": 17674, - "campeonato": 17675, - "deprived": 17676, - "wander": 17677, - "prop": 17678, - "##dong": 17679, - "outlook": 17680, - "##tani": 17681, - "##rix": 17682, - "##eye": 17683, - "chiang": 17684, - "darcy": 17685, - "##oping": 17686, - "mandolin": 17687, - "spice": 17688, - "statesman": 17689, - "babylon": 17690, - "182": 17691, - "walled": 17692, - "forgetting": 17693, - "afro": 17694, - "##cap": 17695, - "158": 17696, - "giorgio": 17697, - "buffer": 17698, - "##polis": 17699, - "planetary": 17700, - "##gis": 17701, - "overlap": 17702, - "terminals": 17703, - "kinda": 17704, - "centenary": 17705, - "##bir": 17706, - "arising": 17707, - "manipulate": 17708, - "elm": 17709, - "ke": 17710, - "1770": 17711, - "ak": 17712, - "##tad": 17713, - "chrysler": 17714, - "mapped": 17715, - "moose": 17716, - "pomeranian": 17717, - "quad": 17718, - "macarthur": 17719, - "assemblies": 17720, - "shoreline": 17721, - "recalls": 17722, - "stratford": 17723, - "##rted": 17724, - "noticeable": 17725, - "##evic": 17726, - "imp": 17727, - "##rita": 17728, - "##sque": 17729, - "accustomed": 17730, - "supplying": 17731, - "tents": 17732, - "disgusted": 17733, - "vogue": 17734, - "sipped": 17735, - "filters": 17736, - "khz": 17737, - "reno": 17738, - "selecting": 17739, - "luftwaffe": 17740, - "mcmahon": 17741, - "tyne": 17742, - "masterpiece": 17743, - "carriages": 17744, - "collided": 17745, - "dunes": 17746, - "exercised": 17747, - "flare": 17748, - "remembers": 17749, - "muzzle": 17750, - "##mobile": 17751, - "heck": 17752, - "##rson": 17753, - "burgess": 17754, - "lunged": 17755, - "middleton": 17756, - "boycott": 17757, - "bilateral": 17758, - "##sity": 17759, - "hazardous": 17760, - "lumpur": 17761, - "multiplayer": 17762, - "spotlight": 17763, - "jackets": 17764, - "goldman": 17765, - "liege": 17766, - "porcelain": 17767, - "rag": 17768, - "waterford": 17769, - "benz": 17770, - "attracts": 17771, - "hopeful": 17772, - "battling": 17773, - "ottomans": 17774, - "kensington": 17775, - "baked": 17776, - "hymns": 17777, - "cheyenne": 17778, - "lattice": 17779, - "levine": 17780, - "borrow": 17781, - "polymer": 17782, - "clashes": 17783, - "michaels": 17784, - "monitored": 17785, - "commitments": 17786, - "denounced": 17787, - "##25": 17788, - "##von": 17789, - "cavity": 17790, - "##oney": 17791, - "hobby": 17792, - "akin": 17793, - "##holders": 17794, - "futures": 17795, - "intricate": 17796, - "cornish": 17797, - "patty": 17798, - "##oned": 17799, - "illegally": 17800, - "dolphin": 17801, - "##lag": 17802, - "barlow": 17803, - "yellowish": 17804, - "maddie": 17805, - "apologized": 17806, - "luton": 17807, - "plagued": 17808, - "##puram": 17809, - "nana": 17810, - "##rds": 17811, - "sway": 17812, - "fanny": 17813, - "łodz": 17814, - "##rino": 17815, - "psi": 17816, - "suspicions": 17817, - "hanged": 17818, - "##eding": 17819, - "initiate": 17820, - "charlton": 17821, - "##por": 17822, - "nak": 17823, - "competent": 17824, - "235": 17825, - "analytical": 17826, - "annex": 17827, - "wardrobe": 17828, - "reservations": 17829, - "##rma": 17830, - "sect": 17831, - "162": 17832, - "fairfax": 17833, - "hedge": 17834, - "piled": 17835, - "buckingham": 17836, - "uneven": 17837, - "bauer": 17838, - "simplicity": 17839, - "snyder": 17840, - "interpret": 17841, - "accountability": 17842, - "donors": 17843, - "moderately": 17844, - "byrd": 17845, - "continents": 17846, - "##cite": 17847, - "##max": 17848, - "disciple": 17849, - "hr": 17850, - "jamaican": 17851, - "ping": 17852, - "nominees": 17853, - "##uss": 17854, - "mongolian": 17855, - "diver": 17856, - "attackers": 17857, - "eagerly": 17858, - "ideological": 17859, - "pillows": 17860, - "miracles": 17861, - "apartheid": 17862, - "revolver": 17863, - "sulfur": 17864, - "clinics": 17865, - "moran": 17866, - "163": 17867, - "##enko": 17868, - "ile": 17869, - "katy": 17870, - "rhetoric": 17871, - "##icated": 17872, - "chronology": 17873, - "recycling": 17874, - "##hrer": 17875, - "elongated": 17876, - "mughal": 17877, - "pascal": 17878, - "profiles": 17879, - "vibration": 17880, - "databases": 17881, - "domination": 17882, - "##fare": 17883, - "##rant": 17884, - "matthias": 17885, - "digest": 17886, - "rehearsal": 17887, - "polling": 17888, - "weiss": 17889, - "initiation": 17890, - "reeves": 17891, - "clinging": 17892, - "flourished": 17893, - "impress": 17894, - "ngo": 17895, - "##hoff": 17896, - "##ume": 17897, - "buckley": 17898, - "symposium": 17899, - "rhythms": 17900, - "weed": 17901, - "emphasize": 17902, - "transforming": 17903, - "##taking": 17904, - "##gence": 17905, - "##yman": 17906, - "accountant": 17907, - "analyze": 17908, - "flicker": 17909, - "foil": 17910, - "priesthood": 17911, - "voluntarily": 17912, - "decreases": 17913, - "##80": 17914, - "##hya": 17915, - "slater": 17916, - "sv": 17917, - "charting": 17918, - "mcgill": 17919, - "##lde": 17920, - "moreno": 17921, - "##iu": 17922, - "besieged": 17923, - "zur": 17924, - "robes": 17925, - "##phic": 17926, - "admitting": 17927, - "api": 17928, - "deported": 17929, - "turmoil": 17930, - "peyton": 17931, - "earthquakes": 17932, - "##ares": 17933, - "nationalists": 17934, - "beau": 17935, - "clair": 17936, - "brethren": 17937, - "interrupt": 17938, - "welch": 17939, - "curated": 17940, - "galerie": 17941, - "requesting": 17942, - "164": 17943, - "##ested": 17944, - "impending": 17945, - "steward": 17946, - "viper": 17947, - "##vina": 17948, - "complaining": 17949, - "beautifully": 17950, - "brandy": 17951, - "foam": 17952, - "nl": 17953, - "1660": 17954, - "##cake": 17955, - "alessandro": 17956, - "punches": 17957, - "laced": 17958, - "explanations": 17959, - "##lim": 17960, - "attribute": 17961, - "clit": 17962, - "reggie": 17963, - "discomfort": 17964, - "##cards": 17965, - "smoothed": 17966, - "whales": 17967, - "##cene": 17968, - "adler": 17969, - "countered": 17970, - "duffy": 17971, - "disciplinary": 17972, - "widening": 17973, - "recipe": 17974, - "reliance": 17975, - "conducts": 17976, - "goats": 17977, - "gradient": 17978, - "preaching": 17979, - "##shaw": 17980, - "matilda": 17981, - "quasi": 17982, - "striped": 17983, - "meridian": 17984, - "cannabis": 17985, - "cordoba": 17986, - "certificates": 17987, - "##agh": 17988, - "##tering": 17989, - "graffiti": 17990, - "hangs": 17991, - "pilgrims": 17992, - "repeats": 17993, - "##ych": 17994, - "revive": 17995, - "urine": 17996, - "etat": 17997, - "##hawk": 17998, - "fueled": 17999, - "belts": 18000, - "fuzzy": 18001, - "susceptible": 18002, - "##hang": 18003, - "mauritius": 18004, - "salle": 18005, - "sincere": 18006, - "beers": 18007, - "hooks": 18008, - "##cki": 18009, - "arbitration": 18010, - "entrusted": 18011, - "advise": 18012, - "sniffed": 18013, - "seminar": 18014, - "junk": 18015, - "donnell": 18016, - "processors": 18017, - "principality": 18018, - "strapped": 18019, - "celia": 18020, - "mendoza": 18021, - "everton": 18022, - "fortunes": 18023, - "prejudice": 18024, - "starving": 18025, - "reassigned": 18026, - "steamer": 18027, - "##lund": 18028, - "tuck": 18029, - "evenly": 18030, - "foreman": 18031, - "##ffen": 18032, - "dans": 18033, - "375": 18034, - "envisioned": 18035, - "slit": 18036, - "##xy": 18037, - "baseman": 18038, - "liberia": 18039, - "rosemary": 18040, - "##weed": 18041, - "electrified": 18042, - "periodically": 18043, - "potassium": 18044, - "stride": 18045, - "contexts": 18046, - "sperm": 18047, - "slade": 18048, - "mariners": 18049, - "influx": 18050, - "bianca": 18051, - "subcommittee": 18052, - "##rane": 18053, - "spilling": 18054, - "icao": 18055, - "estuary": 18056, - "##nock": 18057, - "delivers": 18058, - "iphone": 18059, - "##ulata": 18060, - "isa": 18061, - "mira": 18062, - "bohemian": 18063, - "dessert": 18064, - "##sbury": 18065, - "welcoming": 18066, - "proudly": 18067, - "slowing": 18068, - "##chs": 18069, - "musee": 18070, - "ascension": 18071, - "russ": 18072, - "##vian": 18073, - "waits": 18074, - "##psy": 18075, - "africans": 18076, - "exploit": 18077, - "##morphic": 18078, - "gov": 18079, - "eccentric": 18080, - "crab": 18081, - "peck": 18082, - "##ull": 18083, - "entrances": 18084, - "formidable": 18085, - "marketplace": 18086, - "groom": 18087, - "bolted": 18088, - "metabolism": 18089, - "patton": 18090, - "robbins": 18091, - "courier": 18092, - "payload": 18093, - "endure": 18094, - "##ifier": 18095, - "andes": 18096, - "refrigerator": 18097, - "##pr": 18098, - "ornate": 18099, - "##uca": 18100, - "ruthless": 18101, - "illegitimate": 18102, - "masonry": 18103, - "strasbourg": 18104, - "bikes": 18105, - "adobe": 18106, - "##³": 18107, - "apples": 18108, - "quintet": 18109, - "willingly": 18110, - "niche": 18111, - "bakery": 18112, - "corpses": 18113, - "energetic": 18114, - "##cliffe": 18115, - "##sser": 18116, - "##ards": 18117, - "177": 18118, - "centimeters": 18119, - "centro": 18120, - "fuscous": 18121, - "cretaceous": 18122, - "rancho": 18123, - "##yde": 18124, - "andrei": 18125, - "telecom": 18126, - "tottenham": 18127, - "oasis": 18128, - "ordination": 18129, - "vulnerability": 18130, - "presiding": 18131, - "corey": 18132, - "cp": 18133, - "penguins": 18134, - "sims": 18135, - "##pis": 18136, - "malawi": 18137, - "piss": 18138, - "##48": 18139, - "correction": 18140, - "##cked": 18141, - "##ffle": 18142, - "##ryn": 18143, - "countdown": 18144, - "detectives": 18145, - "psychiatrist": 18146, - "psychedelic": 18147, - "dinosaurs": 18148, - "blouse": 18149, - "##get": 18150, - "choi": 18151, - "vowed": 18152, - "##oz": 18153, - "randomly": 18154, - "##pol": 18155, - "49ers": 18156, - "scrub": 18157, - "blanche": 18158, - "bruins": 18159, - "dusseldorf": 18160, - "##using": 18161, - "unwanted": 18162, - "##ums": 18163, - "212": 18164, - "dominique": 18165, - "elevations": 18166, - "headlights": 18167, - "om": 18168, - "laguna": 18169, - "##oga": 18170, - "1750": 18171, - "famously": 18172, - "ignorance": 18173, - "shrewsbury": 18174, - "##aine": 18175, - "ajax": 18176, - "breuning": 18177, - "che": 18178, - "confederacy": 18179, - "greco": 18180, - "overhaul": 18181, - "##screen": 18182, - "paz": 18183, - "skirts": 18184, - "disagreement": 18185, - "cruelty": 18186, - "jagged": 18187, - "phoebe": 18188, - "shifter": 18189, - "hovered": 18190, - "viruses": 18191, - "##wes": 18192, - "mandy": 18193, - "##lined": 18194, - "##gc": 18195, - "landlord": 18196, - "squirrel": 18197, - "dashed": 18198, - "##ι": 18199, - "ornamental": 18200, - "gag": 18201, - "wally": 18202, - "grange": 18203, - "literal": 18204, - "spurs": 18205, - "undisclosed": 18206, - "proceeding": 18207, - "yin": 18208, - "##text": 18209, - "billie": 18210, - "orphan": 18211, - "spanned": 18212, - "humidity": 18213, - "indy": 18214, - "weighted": 18215, - "presentations": 18216, - "explosions": 18217, - "lucian": 18218, - "##tary": 18219, - "vaughn": 18220, - "hindus": 18221, - "##anga": 18222, - "##hell": 18223, - "psycho": 18224, - "171": 18225, - "daytona": 18226, - "protects": 18227, - "efficiently": 18228, - "rematch": 18229, - "sly": 18230, - "tandem": 18231, - "##oya": 18232, - "rebranded": 18233, - "impaired": 18234, - "hee": 18235, - "metropolis": 18236, - "peach": 18237, - "godfrey": 18238, - "diaspora": 18239, - "ethnicity": 18240, - "prosperous": 18241, - "gleaming": 18242, - "dar": 18243, - "grossing": 18244, - "playback": 18245, - "##rden": 18246, - "stripe": 18247, - "pistols": 18248, - "##tain": 18249, - "births": 18250, - "labelled": 18251, - "##cating": 18252, - "172": 18253, - "rudy": 18254, - "alba": 18255, - "##onne": 18256, - "aquarium": 18257, - "hostility": 18258, - "##gb": 18259, - "##tase": 18260, - "shudder": 18261, - "sumatra": 18262, - "hardest": 18263, - "lakers": 18264, - "consonant": 18265, - "creeping": 18266, - "demos": 18267, - "homicide": 18268, - "capsule": 18269, - "zeke": 18270, - "liberties": 18271, - "expulsion": 18272, - "pueblo": 18273, - "##comb": 18274, - "trait": 18275, - "transporting": 18276, - "##ddin": 18277, - "##neck": 18278, - "##yna": 18279, - "depart": 18280, - "gregg": 18281, - "mold": 18282, - "ledge": 18283, - "hangar": 18284, - "oldham": 18285, - "playboy": 18286, - "termination": 18287, - "analysts": 18288, - "gmbh": 18289, - "romero": 18290, - "##itic": 18291, - "insist": 18292, - "cradle": 18293, - "filthy": 18294, - "brightness": 18295, - "slash": 18296, - "shootout": 18297, - "deposed": 18298, - "bordering": 18299, - "##truct": 18300, - "isis": 18301, - "microwave": 18302, - "tumbled": 18303, - "sheltered": 18304, - "cathy": 18305, - "werewolves": 18306, - "messy": 18307, - "andersen": 18308, - "convex": 18309, - "clapped": 18310, - "clinched": 18311, - "satire": 18312, - "wasting": 18313, - "edo": 18314, - "vc": 18315, - "rufus": 18316, - "##jak": 18317, - "mont": 18318, - "##etti": 18319, - "poznan": 18320, - "##keeping": 18321, - "restructuring": 18322, - "transverse": 18323, - "##rland": 18324, - "azerbaijani": 18325, - "slovene": 18326, - "gestures": 18327, - "roommate": 18328, - "choking": 18329, - "shear": 18330, - "##quist": 18331, - "vanguard": 18332, - "oblivious": 18333, - "##hiro": 18334, - "disagreed": 18335, - "baptism": 18336, - "##lich": 18337, - "coliseum": 18338, - "##aceae": 18339, - "salvage": 18340, - "societe": 18341, - "cory": 18342, - "locke": 18343, - "relocation": 18344, - "relying": 18345, - "versailles": 18346, - "ahl": 18347, - "swelling": 18348, - "##elo": 18349, - "cheerful": 18350, - "##word": 18351, - "##edes": 18352, - "gin": 18353, - "sarajevo": 18354, - "obstacle": 18355, - "diverted": 18356, - "##nac": 18357, - "messed": 18358, - "thoroughbred": 18359, - "fluttered": 18360, - "utrecht": 18361, - "chewed": 18362, - "acquaintance": 18363, - "assassins": 18364, - "dispatch": 18365, - "mirza": 18366, - "##wart": 18367, - "nike": 18368, - "salzburg": 18369, - "swell": 18370, - "yen": 18371, - "##gee": 18372, - "idle": 18373, - "ligue": 18374, - "samson": 18375, - "##nds": 18376, - "##igh": 18377, - "playful": 18378, - "spawned": 18379, - "##cise": 18380, - "tease": 18381, - "##case": 18382, - "burgundy": 18383, - "##bot": 18384, - "stirring": 18385, - "skeptical": 18386, - "interceptions": 18387, - "marathi": 18388, - "##dies": 18389, - "bedrooms": 18390, - "aroused": 18391, - "pinch": 18392, - "##lik": 18393, - "preferences": 18394, - "tattoos": 18395, - "buster": 18396, - "digitally": 18397, - "projecting": 18398, - "rust": 18399, - "##ital": 18400, - "kitten": 18401, - "priorities": 18402, - "addison": 18403, - "pseudo": 18404, - "##guard": 18405, - "dusk": 18406, - "icons": 18407, - "sermon": 18408, - "##psis": 18409, - "##iba": 18410, - "bt": 18411, - "##lift": 18412, - "##xt": 18413, - "ju": 18414, - "truce": 18415, - "rink": 18416, - "##dah": 18417, - "##wy": 18418, - "defects": 18419, - "psychiatry": 18420, - "offences": 18421, - "calculate": 18422, - "glucose": 18423, - "##iful": 18424, - "##rized": 18425, - "##unda": 18426, - "francaise": 18427, - "##hari": 18428, - "richest": 18429, - "warwickshire": 18430, - "carly": 18431, - "1763": 18432, - "purity": 18433, - "redemption": 18434, - "lending": 18435, - "##cious": 18436, - "muse": 18437, - "bruises": 18438, - "cerebral": 18439, - "aero": 18440, - "carving": 18441, - "##name": 18442, - "preface": 18443, - "terminology": 18444, - "invade": 18445, - "monty": 18446, - "##int": 18447, - "anarchist": 18448, - "blurred": 18449, - "##iled": 18450, - "rossi": 18451, - "treats": 18452, - "guts": 18453, - "shu": 18454, - "foothills": 18455, - "ballads": 18456, - "undertaking": 18457, - "premise": 18458, - "cecilia": 18459, - "affiliates": 18460, - "blasted": 18461, - "conditional": 18462, - "wilder": 18463, - "minors": 18464, - "drone": 18465, - "rudolph": 18466, - "buffy": 18467, - "swallowing": 18468, - "horton": 18469, - "attested": 18470, - "##hop": 18471, - "rutherford": 18472, - "howell": 18473, - "primetime": 18474, - "livery": 18475, - "penal": 18476, - "##bis": 18477, - "minimize": 18478, - "hydro": 18479, - "wrecked": 18480, - "wrought": 18481, - "palazzo": 18482, - "##gling": 18483, - "cans": 18484, - "vernacular": 18485, - "friedman": 18486, - "nobleman": 18487, - "shale": 18488, - "walnut": 18489, - "danielle": 18490, - "##ection": 18491, - "##tley": 18492, - "sears": 18493, - "##kumar": 18494, - "chords": 18495, - "lend": 18496, - "flipping": 18497, - "streamed": 18498, - "por": 18499, - "dracula": 18500, - "gallons": 18501, - "sacrifices": 18502, - "gamble": 18503, - "orphanage": 18504, - "##iman": 18505, - "mckenzie": 18506, - "##gible": 18507, - "boxers": 18508, - "daly": 18509, - "##balls": 18510, - "##ان": 18511, - "208": 18512, - "##ific": 18513, - "##rative": 18514, - "##iq": 18515, - "exploited": 18516, - "slated": 18517, - "##uity": 18518, - "circling": 18519, - "hillary": 18520, - "pinched": 18521, - "goldberg": 18522, - "provost": 18523, - "campaigning": 18524, - "lim": 18525, - "piles": 18526, - "ironically": 18527, - "jong": 18528, - "mohan": 18529, - "successors": 18530, - "usaf": 18531, - "##tem": 18532, - "##ught": 18533, - "autobiographical": 18534, - "haute": 18535, - "preserves": 18536, - "##ending": 18537, - "acquitted": 18538, - "comparisons": 18539, - "203": 18540, - "hydroelectric": 18541, - "gangs": 18542, - "cypriot": 18543, - "torpedoes": 18544, - "rushes": 18545, - "chrome": 18546, - "derive": 18547, - "bumps": 18548, - "instability": 18549, - "fiat": 18550, - "pets": 18551, - "##mbe": 18552, - "silas": 18553, - "dye": 18554, - "reckless": 18555, - "settler": 18556, - "##itation": 18557, - "info": 18558, - "heats": 18559, - "##writing": 18560, - "176": 18561, - "canonical": 18562, - "maltese": 18563, - "fins": 18564, - "mushroom": 18565, - "stacy": 18566, - "aspen": 18567, - "avid": 18568, - "##kur": 18569, - "##loading": 18570, - "vickers": 18571, - "gaston": 18572, - "hillside": 18573, - "statutes": 18574, - "wilde": 18575, - "gail": 18576, - "kung": 18577, - "sabine": 18578, - "comfortably": 18579, - "motorcycles": 18580, - "##rgo": 18581, - "169": 18582, - "pneumonia": 18583, - "fetch": 18584, - "##sonic": 18585, - "axel": 18586, - "faintly": 18587, - "parallels": 18588, - "##oop": 18589, - "mclaren": 18590, - "spouse": 18591, - "compton": 18592, - "interdisciplinary": 18593, - "miner": 18594, - "##eni": 18595, - "181": 18596, - "clamped": 18597, - "##chal": 18598, - "##llah": 18599, - "separates": 18600, - "versa": 18601, - "##mler": 18602, - "scarborough": 18603, - "labrador": 18604, - "##lity": 18605, - "##osing": 18606, - "rutgers": 18607, - "hurdles": 18608, - "como": 18609, - "166": 18610, - "burt": 18611, - "divers": 18612, - "##100": 18613, - "wichita": 18614, - "cade": 18615, - "coincided": 18616, - "##erson": 18617, - "bruised": 18618, - "mla": 18619, - "##pper": 18620, - "vineyard": 18621, - "##ili": 18622, - "##brush": 18623, - "notch": 18624, - "mentioning": 18625, - "jase": 18626, - "hearted": 18627, - "kits": 18628, - "doe": 18629, - "##acle": 18630, - "pomerania": 18631, - "##ady": 18632, - "ronan": 18633, - "seizure": 18634, - "pavel": 18635, - "problematic": 18636, - "##zaki": 18637, - "domenico": 18638, - "##ulin": 18639, - "catering": 18640, - "penelope": 18641, - "dependence": 18642, - "parental": 18643, - "emilio": 18644, - "ministerial": 18645, - "atkinson": 18646, - "##bolic": 18647, - "clarkson": 18648, - "chargers": 18649, - "colby": 18650, - "grill": 18651, - "peeked": 18652, - "arises": 18653, - "summon": 18654, - "##aged": 18655, - "fools": 18656, - "##grapher": 18657, - "faculties": 18658, - "qaeda": 18659, - "##vial": 18660, - "garner": 18661, - "refurbished": 18662, - "##hwa": 18663, - "geelong": 18664, - "disasters": 18665, - "nudged": 18666, - "bs": 18667, - "shareholder": 18668, - "lori": 18669, - "algae": 18670, - "reinstated": 18671, - "rot": 18672, - "##ades": 18673, - "##nous": 18674, - "invites": 18675, - "stainless": 18676, - "183": 18677, - "inclusive": 18678, - "##itude": 18679, - "diocesan": 18680, - "til": 18681, - "##icz": 18682, - "denomination": 18683, - "##xa": 18684, - "benton": 18685, - "floral": 18686, - "registers": 18687, - "##ider": 18688, - "##erman": 18689, - "##kell": 18690, - "absurd": 18691, - "brunei": 18692, - "guangzhou": 18693, - "hitter": 18694, - "retaliation": 18695, - "##uled": 18696, - "##eve": 18697, - "blanc": 18698, - "nh": 18699, - "consistency": 18700, - "contamination": 18701, - "##eres": 18702, - "##rner": 18703, - "dire": 18704, - "palermo": 18705, - "broadcasters": 18706, - "diaries": 18707, - "inspire": 18708, - "vols": 18709, - "brewer": 18710, - "tightening": 18711, - "ky": 18712, - "mixtape": 18713, - "hormone": 18714, - "##tok": 18715, - "stokes": 18716, - "##color": 18717, - "##dly": 18718, - "##ssi": 18719, - "pg": 18720, - "##ometer": 18721, - "##lington": 18722, - "sanitation": 18723, - "##tility": 18724, - "intercontinental": 18725, - "apps": 18726, - "##adt": 18727, - "¹⁄₂": 18728, - "cylinders": 18729, - "economies": 18730, - "favourable": 18731, - "unison": 18732, - "croix": 18733, - "gertrude": 18734, - "odyssey": 18735, - "vanity": 18736, - "dangling": 18737, - "##logists": 18738, - "upgrades": 18739, - "dice": 18740, - "middleweight": 18741, - "practitioner": 18742, - "##ight": 18743, - "206": 18744, - "henrik": 18745, - "parlor": 18746, - "orion": 18747, - "angered": 18748, - "lac": 18749, - "python": 18750, - "blurted": 18751, - "##rri": 18752, - "sensual": 18753, - "intends": 18754, - "swings": 18755, - "angled": 18756, - "##phs": 18757, - "husky": 18758, - "attain": 18759, - "peerage": 18760, - "precinct": 18761, - "textiles": 18762, - "cheltenham": 18763, - "shuffled": 18764, - "dai": 18765, - "confess": 18766, - "tasting": 18767, - "bhutan": 18768, - "##riation": 18769, - "tyrone": 18770, - "segregation": 18771, - "abrupt": 18772, - "ruiz": 18773, - "##rish": 18774, - "smirked": 18775, - "blackwell": 18776, - "confidential": 18777, - "browning": 18778, - "amounted": 18779, - "##put": 18780, - "vase": 18781, - "scarce": 18782, - "fabulous": 18783, - "raided": 18784, - "staple": 18785, - "guyana": 18786, - "unemployed": 18787, - "glider": 18788, - "shay": 18789, - "##tow": 18790, - "carmine": 18791, - "troll": 18792, - "intervene": 18793, - "squash": 18794, - "superstar": 18795, - "##uce": 18796, - "cylindrical": 18797, - "len": 18798, - "roadway": 18799, - "researched": 18800, - "handy": 18801, - "##rium": 18802, - "##jana": 18803, - "meta": 18804, - "lao": 18805, - "declares": 18806, - "##rring": 18807, - "##tadt": 18808, - "##elin": 18809, - "##kova": 18810, - "willem": 18811, - "shrubs": 18812, - "napoleonic": 18813, - "realms": 18814, - "skater": 18815, - "qi": 18816, - "volkswagen": 18817, - "##ł": 18818, - "tad": 18819, - "hara": 18820, - "archaeologist": 18821, - "awkwardly": 18822, - "eerie": 18823, - "##kind": 18824, - "wiley": 18825, - "##heimer": 18826, - "##24": 18827, - "titus": 18828, - "organizers": 18829, - "cfl": 18830, - "crusaders": 18831, - "lama": 18832, - "usb": 18833, - "vent": 18834, - "enraged": 18835, - "thankful": 18836, - "occupants": 18837, - "maximilian": 18838, - "##gaard": 18839, - "possessing": 18840, - "textbooks": 18841, - "##oran": 18842, - "collaborator": 18843, - "quaker": 18844, - "##ulo": 18845, - "avalanche": 18846, - "mono": 18847, - "silky": 18848, - "straits": 18849, - "isaiah": 18850, - "mustang": 18851, - "surged": 18852, - "resolutions": 18853, - "potomac": 18854, - "descend": 18855, - "cl": 18856, - "kilograms": 18857, - "plato": 18858, - "strains": 18859, - "saturdays": 18860, - "##olin": 18861, - "bernstein": 18862, - "##ype": 18863, - "holstein": 18864, - "ponytail": 18865, - "##watch": 18866, - "belize": 18867, - "conversely": 18868, - "heroine": 18869, - "perpetual": 18870, - "##ylus": 18871, - "charcoal": 18872, - "piedmont": 18873, - "glee": 18874, - "negotiating": 18875, - "backdrop": 18876, - "prologue": 18877, - "##jah": 18878, - "##mmy": 18879, - "pasadena": 18880, - "climbs": 18881, - "ramos": 18882, - "sunni": 18883, - "##holm": 18884, - "##tner": 18885, - "##tri": 18886, - "anand": 18887, - "deficiency": 18888, - "hertfordshire": 18889, - "stout": 18890, - "##avi": 18891, - "aperture": 18892, - "orioles": 18893, - "##irs": 18894, - "doncaster": 18895, - "intrigued": 18896, - "bombed": 18897, - "coating": 18898, - "otis": 18899, - "##mat": 18900, - "cocktail": 18901, - "##jit": 18902, - "##eto": 18903, - "amir": 18904, - "arousal": 18905, - "sar": 18906, - "##proof": 18907, - "##act": 18908, - "##ories": 18909, - "dixie": 18910, - "pots": 18911, - "##bow": 18912, - "whereabouts": 18913, - "159": 18914, - "##fted": 18915, - "drains": 18916, - "bullying": 18917, - "cottages": 18918, - "scripture": 18919, - "coherent": 18920, - "fore": 18921, - "poe": 18922, - "appetite": 18923, - "##uration": 18924, - "sampled": 18925, - "##ators": 18926, - "##dp": 18927, - "derrick": 18928, - "rotor": 18929, - "jays": 18930, - "peacock": 18931, - "installment": 18932, - "##rro": 18933, - "advisors": 18934, - "##coming": 18935, - "rodeo": 18936, - "scotch": 18937, - "##mot": 18938, - "##db": 18939, - "##fen": 18940, - "##vant": 18941, - "ensued": 18942, - "rodrigo": 18943, - "dictatorship": 18944, - "martyrs": 18945, - "twenties": 18946, - "##н": 18947, - "towed": 18948, - "incidence": 18949, - "marta": 18950, - "rainforest": 18951, - "sai": 18952, - "scaled": 18953, - "##cles": 18954, - "oceanic": 18955, - "qualifiers": 18956, - "symphonic": 18957, - "mcbride": 18958, - "dislike": 18959, - "generalized": 18960, - "aubrey": 18961, - "colonization": 18962, - "##iation": 18963, - "##lion": 18964, - "##ssing": 18965, - "disliked": 18966, - "lublin": 18967, - "salesman": 18968, - "##ulates": 18969, - "spherical": 18970, - "whatsoever": 18971, - "sweating": 18972, - "avalon": 18973, - "contention": 18974, - "punt": 18975, - "severity": 18976, - "alderman": 18977, - "atari": 18978, - "##dina": 18979, - "##grant": 18980, - "##rop": 18981, - "scarf": 18982, - "seville": 18983, - "vertices": 18984, - "annexation": 18985, - "fairfield": 18986, - "fascination": 18987, - "inspiring": 18988, - "launches": 18989, - "palatinate": 18990, - "regretted": 18991, - "##rca": 18992, - "feral": 18993, - "##iom": 18994, - "elk": 18995, - "nap": 18996, - "olsen": 18997, - "reddy": 18998, - "yong": 18999, - "##leader": 19000, - "##iae": 19001, - "garment": 19002, - "transports": 19003, - "feng": 19004, - "gracie": 19005, - "outrage": 19006, - "viceroy": 19007, - "insides": 19008, - "##esis": 19009, - "breakup": 19010, - "grady": 19011, - "organizer": 19012, - "softer": 19013, - "grimaced": 19014, - "222": 19015, - "murals": 19016, - "galicia": 19017, - "arranging": 19018, - "vectors": 19019, - "##rsten": 19020, - "bas": 19021, - "##sb": 19022, - "##cens": 19023, - "sloan": 19024, - "##eka": 19025, - "bitten": 19026, - "ara": 19027, - "fender": 19028, - "nausea": 19029, - "bumped": 19030, - "kris": 19031, - "banquet": 19032, - "comrades": 19033, - "detector": 19034, - "persisted": 19035, - "##llan": 19036, - "adjustment": 19037, - "endowed": 19038, - "cinemas": 19039, - "##shot": 19040, - "sellers": 19041, - "##uman": 19042, - "peek": 19043, - "epa": 19044, - "kindly": 19045, - "neglect": 19046, - "simpsons": 19047, - "talon": 19048, - "mausoleum": 19049, - "runaway": 19050, - "hangul": 19051, - "lookout": 19052, - "##cic": 19053, - "rewards": 19054, - "coughed": 19055, - "acquainted": 19056, - "chloride": 19057, - "##ald": 19058, - "quicker": 19059, - "accordion": 19060, - "neolithic": 19061, - "##qa": 19062, - "artemis": 19063, - "coefficient": 19064, - "lenny": 19065, - "pandora": 19066, - "tx": 19067, - "##xed": 19068, - "ecstasy": 19069, - "litter": 19070, - "segunda": 19071, - "chairperson": 19072, - "gemma": 19073, - "hiss": 19074, - "rumor": 19075, - "vow": 19076, - "nasal": 19077, - "antioch": 19078, - "compensate": 19079, - "patiently": 19080, - "transformers": 19081, - "##eded": 19082, - "judo": 19083, - "morrow": 19084, - "penis": 19085, - "posthumous": 19086, - "philips": 19087, - "bandits": 19088, - "husbands": 19089, - "denote": 19090, - "flaming": 19091, - "##any": 19092, - "##phones": 19093, - "langley": 19094, - "yorker": 19095, - "1760": 19096, - "walters": 19097, - "##uo": 19098, - "##kle": 19099, - "gubernatorial": 19100, - "fatty": 19101, - "samsung": 19102, - "leroy": 19103, - "outlaw": 19104, - "##nine": 19105, - "unpublished": 19106, - "poole": 19107, - "jakob": 19108, - "##ᵢ": 19109, - "##ₙ": 19110, - "crete": 19111, - "distorted": 19112, - "superiority": 19113, - "##dhi": 19114, - "intercept": 19115, - "crust": 19116, - "mig": 19117, - "claus": 19118, - "crashes": 19119, - "positioning": 19120, - "188": 19121, - "stallion": 19122, - "301": 19123, - "frontal": 19124, - "armistice": 19125, - "##estinal": 19126, - "elton": 19127, - "aj": 19128, - "encompassing": 19129, - "camel": 19130, - "commemorated": 19131, - "malaria": 19132, - "woodward": 19133, - "calf": 19134, - "cigar": 19135, - "penetrate": 19136, - "##oso": 19137, - "willard": 19138, - "##rno": 19139, - "##uche": 19140, - "illustrate": 19141, - "amusing": 19142, - "convergence": 19143, - "noteworthy": 19144, - "##lma": 19145, - "##rva": 19146, - "journeys": 19147, - "realise": 19148, - "manfred": 19149, - "##sable": 19150, - "410": 19151, - "##vocation": 19152, - "hearings": 19153, - "fiance": 19154, - "##posed": 19155, - "educators": 19156, - "provoked": 19157, - "adjusting": 19158, - "##cturing": 19159, - "modular": 19160, - "stockton": 19161, - "paterson": 19162, - "vlad": 19163, - "rejects": 19164, - "electors": 19165, - "selena": 19166, - "maureen": 19167, - "##tres": 19168, - "uber": 19169, - "##rce": 19170, - "swirled": 19171, - "##num": 19172, - "proportions": 19173, - "nanny": 19174, - "pawn": 19175, - "naturalist": 19176, - "parma": 19177, - "apostles": 19178, - "awoke": 19179, - "ethel": 19180, - "wen": 19181, - "##bey": 19182, - "monsoon": 19183, - "overview": 19184, - "##inating": 19185, - "mccain": 19186, - "rendition": 19187, - "risky": 19188, - "adorned": 19189, - "##ih": 19190, - "equestrian": 19191, - "germain": 19192, - "nj": 19193, - "conspicuous": 19194, - "confirming": 19195, - "##yoshi": 19196, - "shivering": 19197, - "##imeter": 19198, - "milestone": 19199, - "rumours": 19200, - "flinched": 19201, - "bounds": 19202, - "smacked": 19203, - "token": 19204, - "##bei": 19205, - "lectured": 19206, - "automobiles": 19207, - "##shore": 19208, - "impacted": 19209, - "##iable": 19210, - "nouns": 19211, - "nero": 19212, - "##leaf": 19213, - "ismail": 19214, - "prostitute": 19215, - "trams": 19216, - "##lace": 19217, - "bridget": 19218, - "sud": 19219, - "stimulus": 19220, - "impressions": 19221, - "reins": 19222, - "revolves": 19223, - "##oud": 19224, - "##gned": 19225, - "giro": 19226, - "honeymoon": 19227, - "##swell": 19228, - "criterion": 19229, - "##sms": 19230, - "##uil": 19231, - "libyan": 19232, - "prefers": 19233, - "##osition": 19234, - "211": 19235, - "preview": 19236, - "sucks": 19237, - "accusation": 19238, - "bursts": 19239, - "metaphor": 19240, - "diffusion": 19241, - "tolerate": 19242, - "faye": 19243, - "betting": 19244, - "cinematographer": 19245, - "liturgical": 19246, - "specials": 19247, - "bitterly": 19248, - "humboldt": 19249, - "##ckle": 19250, - "flux": 19251, - "rattled": 19252, - "##itzer": 19253, - "archaeologists": 19254, - "odor": 19255, - "authorised": 19256, - "marshes": 19257, - "discretion": 19258, - "##ов": 19259, - "alarmed": 19260, - "archaic": 19261, - "inverse": 19262, - "##leton": 19263, - "explorers": 19264, - "##pine": 19265, - "drummond": 19266, - "tsunami": 19267, - "woodlands": 19268, - "##minate": 19269, - "##tland": 19270, - "booklet": 19271, - "insanity": 19272, - "owning": 19273, - "insert": 19274, - "crafted": 19275, - "calculus": 19276, - "##tore": 19277, - "receivers": 19278, - "##bt": 19279, - "stung": 19280, - "##eca": 19281, - "##nched": 19282, - "prevailing": 19283, - "travellers": 19284, - "eyeing": 19285, - "lila": 19286, - "graphs": 19287, - "##borne": 19288, - "178": 19289, - "julien": 19290, - "##won": 19291, - "morale": 19292, - "adaptive": 19293, - "therapist": 19294, - "erica": 19295, - "cw": 19296, - "libertarian": 19297, - "bowman": 19298, - "pitches": 19299, - "vita": 19300, - "##ional": 19301, - "crook": 19302, - "##ads": 19303, - "##entation": 19304, - "caledonia": 19305, - "mutiny": 19306, - "##sible": 19307, - "1840s": 19308, - "automation": 19309, - "##ß": 19310, - "flock": 19311, - "##pia": 19312, - "ironic": 19313, - "pathology": 19314, - "##imus": 19315, - "remarried": 19316, - "##22": 19317, - "joker": 19318, - "withstand": 19319, - "energies": 19320, - "##att": 19321, - "shropshire": 19322, - "hostages": 19323, - "madeleine": 19324, - "tentatively": 19325, - "conflicting": 19326, - "mateo": 19327, - "recipes": 19328, - "euros": 19329, - "ol": 19330, - "mercenaries": 19331, - "nico": 19332, - "##ndon": 19333, - "albuquerque": 19334, - "augmented": 19335, - "mythical": 19336, - "bel": 19337, - "freud": 19338, - "##child": 19339, - "cough": 19340, - "##lica": 19341, - "365": 19342, - "freddy": 19343, - "lillian": 19344, - "genetically": 19345, - "nuremberg": 19346, - "calder": 19347, - "209": 19348, - "bonn": 19349, - "outdoors": 19350, - "paste": 19351, - "suns": 19352, - "urgency": 19353, - "vin": 19354, - "restraint": 19355, - "tyson": 19356, - "##cera": 19357, - "##selle": 19358, - "barrage": 19359, - "bethlehem": 19360, - "kahn": 19361, - "##par": 19362, - "mounts": 19363, - "nippon": 19364, - "barony": 19365, - "happier": 19366, - "ryu": 19367, - "makeshift": 19368, - "sheldon": 19369, - "blushed": 19370, - "castillo": 19371, - "barking": 19372, - "listener": 19373, - "taped": 19374, - "bethel": 19375, - "fluent": 19376, - "headlines": 19377, - "pornography": 19378, - "rum": 19379, - "disclosure": 19380, - "sighing": 19381, - "mace": 19382, - "doubling": 19383, - "gunther": 19384, - "manly": 19385, - "##plex": 19386, - "rt": 19387, - "interventions": 19388, - "physiological": 19389, - "forwards": 19390, - "emerges": 19391, - "##tooth": 19392, - "##gny": 19393, - "compliment": 19394, - "rib": 19395, - "recession": 19396, - "visibly": 19397, - "barge": 19398, - "faults": 19399, - "connector": 19400, - "exquisite": 19401, - "prefect": 19402, - "##rlin": 19403, - "patio": 19404, - "##cured": 19405, - "elevators": 19406, - "brandt": 19407, - "italics": 19408, - "pena": 19409, - "173": 19410, - "wasp": 19411, - "satin": 19412, - "ea": 19413, - "botswana": 19414, - "graceful": 19415, - "respectable": 19416, - "##jima": 19417, - "##rter": 19418, - "##oic": 19419, - "franciscan": 19420, - "generates": 19421, - "##dl": 19422, - "alfredo": 19423, - "disgusting": 19424, - "##olate": 19425, - "##iously": 19426, - "sherwood": 19427, - "warns": 19428, - "cod": 19429, - "promo": 19430, - "cheryl": 19431, - "sino": 19432, - "##ة": 19433, - "##escu": 19434, - "twitch": 19435, - "##zhi": 19436, - "brownish": 19437, - "thom": 19438, - "ortiz": 19439, - "##dron": 19440, - "densely": 19441, - "##beat": 19442, - "carmel": 19443, - "reinforce": 19444, - "##bana": 19445, - "187": 19446, - "anastasia": 19447, - "downhill": 19448, - "vertex": 19449, - "contaminated": 19450, - "remembrance": 19451, - "harmonic": 19452, - "homework": 19453, - "##sol": 19454, - "fiancee": 19455, - "gears": 19456, - "olds": 19457, - "angelica": 19458, - "loft": 19459, - "ramsay": 19460, - "quiz": 19461, - "colliery": 19462, - "sevens": 19463, - "##cape": 19464, - "autism": 19465, - "##hil": 19466, - "walkway": 19467, - "##boats": 19468, - "ruben": 19469, - "abnormal": 19470, - "ounce": 19471, - "khmer": 19472, - "##bbe": 19473, - "zachary": 19474, - "bedside": 19475, - "morphology": 19476, - "punching": 19477, - "##olar": 19478, - "sparrow": 19479, - "convinces": 19480, - "##35": 19481, - "hewitt": 19482, - "queer": 19483, - "remastered": 19484, - "rods": 19485, - "mabel": 19486, - "solemn": 19487, - "notified": 19488, - "lyricist": 19489, - "symmetric": 19490, - "##xide": 19491, - "174": 19492, - "encore": 19493, - "passports": 19494, - "wildcats": 19495, - "##uni": 19496, - "baja": 19497, - "##pac": 19498, - "mildly": 19499, - "##ease": 19500, - "bleed": 19501, - "commodity": 19502, - "mounds": 19503, - "glossy": 19504, - "orchestras": 19505, - "##omo": 19506, - "damian": 19507, - "prelude": 19508, - "ambitions": 19509, - "##vet": 19510, - "awhile": 19511, - "remotely": 19512, - "##aud": 19513, - "asserts": 19514, - "imply": 19515, - "##iques": 19516, - "distinctly": 19517, - "modelling": 19518, - "remedy": 19519, - "##dded": 19520, - "windshield": 19521, - "dani": 19522, - "xiao": 19523, - "##endra": 19524, - "audible": 19525, - "powerplant": 19526, - "1300": 19527, - "invalid": 19528, - "elemental": 19529, - "acquisitions": 19530, - "##hala": 19531, - "immaculate": 19532, - "libby": 19533, - "plata": 19534, - "smuggling": 19535, - "ventilation": 19536, - "denoted": 19537, - "minh": 19538, - "##morphism": 19539, - "430": 19540, - "differed": 19541, - "dion": 19542, - "kelley": 19543, - "lore": 19544, - "mocking": 19545, - "sabbath": 19546, - "spikes": 19547, - "hygiene": 19548, - "drown": 19549, - "runoff": 19550, - "stylized": 19551, - "tally": 19552, - "liberated": 19553, - "aux": 19554, - "interpreter": 19555, - "righteous": 19556, - "aba": 19557, - "siren": 19558, - "reaper": 19559, - "pearce": 19560, - "millie": 19561, - "##cier": 19562, - "##yra": 19563, - "gaius": 19564, - "##iso": 19565, - "captures": 19566, - "##ttering": 19567, - "dorm": 19568, - "claudio": 19569, - "##sic": 19570, - "benches": 19571, - "knighted": 19572, - "blackness": 19573, - "##ored": 19574, - "discount": 19575, - "fumble": 19576, - "oxidation": 19577, - "routed": 19578, - "##ς": 19579, - "novak": 19580, - "perpendicular": 19581, - "spoiled": 19582, - "fracture": 19583, - "splits": 19584, - "##urt": 19585, - "pads": 19586, - "topology": 19587, - "##cats": 19588, - "axes": 19589, - "fortunate": 19590, - "offenders": 19591, - "protestants": 19592, - "esteem": 19593, - "221": 19594, - "broadband": 19595, - "convened": 19596, - "frankly": 19597, - "hound": 19598, - "prototypes": 19599, - "isil": 19600, - "facilitated": 19601, - "keel": 19602, - "##sher": 19603, - "sahara": 19604, - "awaited": 19605, - "bubba": 19606, - "orb": 19607, - "prosecutors": 19608, - "186": 19609, - "hem": 19610, - "520": 19611, - "##xing": 19612, - "relaxing": 19613, - "remnant": 19614, - "romney": 19615, - "sorted": 19616, - "slalom": 19617, - "stefano": 19618, - "ulrich": 19619, - "##active": 19620, - "exemption": 19621, - "folder": 19622, - "pauses": 19623, - "foliage": 19624, - "hitchcock": 19625, - "epithet": 19626, - "204": 19627, - "criticisms": 19628, - "##aca": 19629, - "ballistic": 19630, - "brody": 19631, - "hinduism": 19632, - "chaotic": 19633, - "youths": 19634, - "equals": 19635, - "##pala": 19636, - "pts": 19637, - "thicker": 19638, - "analogous": 19639, - "capitalist": 19640, - "improvised": 19641, - "overseeing": 19642, - "sinatra": 19643, - "ascended": 19644, - "beverage": 19645, - "##tl": 19646, - "straightforward": 19647, - "##kon": 19648, - "curran": 19649, - "##west": 19650, - "bois": 19651, - "325": 19652, - "induce": 19653, - "surveying": 19654, - "emperors": 19655, - "sax": 19656, - "unpopular": 19657, - "##kk": 19658, - "cartoonist": 19659, - "fused": 19660, - "##mble": 19661, - "unto": 19662, - "##yuki": 19663, - "localities": 19664, - "##cko": 19665, - "##ln": 19666, - "darlington": 19667, - "slain": 19668, - "academie": 19669, - "lobbying": 19670, - "sediment": 19671, - "puzzles": 19672, - "##grass": 19673, - "defiance": 19674, - "dickens": 19675, - "manifest": 19676, - "tongues": 19677, - "alumnus": 19678, - "arbor": 19679, - "coincide": 19680, - "184": 19681, - "appalachian": 19682, - "mustafa": 19683, - "examiner": 19684, - "cabaret": 19685, - "traumatic": 19686, - "yves": 19687, - "bracelet": 19688, - "draining": 19689, - "heroin": 19690, - "magnum": 19691, - "baths": 19692, - "odessa": 19693, - "consonants": 19694, - "mitsubishi": 19695, - "##gua": 19696, - "kellan": 19697, - "vaudeville": 19698, - "##fr": 19699, - "joked": 19700, - "null": 19701, - "straps": 19702, - "probation": 19703, - "##ław": 19704, - "ceded": 19705, - "interfaces": 19706, - "##pas": 19707, - "##zawa": 19708, - "blinding": 19709, - "viet": 19710, - "224": 19711, - "rothschild": 19712, - "museo": 19713, - "640": 19714, - "huddersfield": 19715, - "##vr": 19716, - "tactic": 19717, - "##storm": 19718, - "brackets": 19719, - "dazed": 19720, - "incorrectly": 19721, - "##vu": 19722, - "reg": 19723, - "glazed": 19724, - "fearful": 19725, - "manifold": 19726, - "benefited": 19727, - "irony": 19728, - "##sun": 19729, - "stumbling": 19730, - "##rte": 19731, - "willingness": 19732, - "balkans": 19733, - "mei": 19734, - "wraps": 19735, - "##aba": 19736, - "injected": 19737, - "##lea": 19738, - "gu": 19739, - "syed": 19740, - "harmless": 19741, - "##hammer": 19742, - "bray": 19743, - "takeoff": 19744, - "poppy": 19745, - "timor": 19746, - "cardboard": 19747, - "astronaut": 19748, - "purdue": 19749, - "weeping": 19750, - "southbound": 19751, - "cursing": 19752, - "stalls": 19753, - "diagonal": 19754, - "##neer": 19755, - "lamar": 19756, - "bryce": 19757, - "comte": 19758, - "weekdays": 19759, - "harrington": 19760, - "##uba": 19761, - "negatively": 19762, - "##see": 19763, - "lays": 19764, - "grouping": 19765, - "##cken": 19766, - "##henko": 19767, - "affirmed": 19768, - "halle": 19769, - "modernist": 19770, - "##lai": 19771, - "hodges": 19772, - "smelling": 19773, - "aristocratic": 19774, - "baptized": 19775, - "dismiss": 19776, - "justification": 19777, - "oilers": 19778, - "##now": 19779, - "coupling": 19780, - "qin": 19781, - "snack": 19782, - "healer": 19783, - "##qing": 19784, - "gardener": 19785, - "layla": 19786, - "battled": 19787, - "formulated": 19788, - "stephenson": 19789, - "gravitational": 19790, - "##gill": 19791, - "##jun": 19792, - "1768": 19793, - "granny": 19794, - "coordinating": 19795, - "suites": 19796, - "##cd": 19797, - "##ioned": 19798, - "monarchs": 19799, - "##cote": 19800, - "##hips": 19801, - "sep": 19802, - "blended": 19803, - "apr": 19804, - "barrister": 19805, - "deposition": 19806, - "fia": 19807, - "mina": 19808, - "policemen": 19809, - "paranoid": 19810, - "##pressed": 19811, - "churchyard": 19812, - "covert": 19813, - "crumpled": 19814, - "creep": 19815, - "abandoning": 19816, - "tr": 19817, - "transmit": 19818, - "conceal": 19819, - "barr": 19820, - "understands": 19821, - "readiness": 19822, - "spire": 19823, - "##cology": 19824, - "##enia": 19825, - "##erry": 19826, - "610": 19827, - "startling": 19828, - "unlock": 19829, - "vida": 19830, - "bowled": 19831, - "slots": 19832, - "##nat": 19833, - "##islav": 19834, - "spaced": 19835, - "trusting": 19836, - "admire": 19837, - "rig": 19838, - "##ink": 19839, - "slack": 19840, - "##70": 19841, - "mv": 19842, - "207": 19843, - "casualty": 19844, - "##wei": 19845, - "classmates": 19846, - "##odes": 19847, - "##rar": 19848, - "##rked": 19849, - "amherst": 19850, - "furnished": 19851, - "evolve": 19852, - "foundry": 19853, - "menace": 19854, - "mead": 19855, - "##lein": 19856, - "flu": 19857, - "wesleyan": 19858, - "##kled": 19859, - "monterey": 19860, - "webber": 19861, - "##vos": 19862, - "wil": 19863, - "##mith": 19864, - "##на": 19865, - "bartholomew": 19866, - "justices": 19867, - "restrained": 19868, - "##cke": 19869, - "amenities": 19870, - "191": 19871, - "mediated": 19872, - "sewage": 19873, - "trenches": 19874, - "ml": 19875, - "mainz": 19876, - "##thus": 19877, - "1800s": 19878, - "##cula": 19879, - "##inski": 19880, - "caine": 19881, - "bonding": 19882, - "213": 19883, - "converts": 19884, - "spheres": 19885, - "superseded": 19886, - "marianne": 19887, - "crypt": 19888, - "sweaty": 19889, - "ensign": 19890, - "historia": 19891, - "##br": 19892, - "spruce": 19893, - "##post": 19894, - "##ask": 19895, - "forks": 19896, - "thoughtfully": 19897, - "yukon": 19898, - "pamphlet": 19899, - "ames": 19900, - "##uter": 19901, - "karma": 19902, - "##yya": 19903, - "bryn": 19904, - "negotiation": 19905, - "sighs": 19906, - "incapable": 19907, - "##mbre": 19908, - "##ntial": 19909, - "actresses": 19910, - "taft": 19911, - "##mill": 19912, - "luce": 19913, - "prevailed": 19914, - "##amine": 19915, - "1773": 19916, - "motionless": 19917, - "envoy": 19918, - "testify": 19919, - "investing": 19920, - "sculpted": 19921, - "instructors": 19922, - "provence": 19923, - "kali": 19924, - "cullen": 19925, - "horseback": 19926, - "##while": 19927, - "goodwin": 19928, - "##jos": 19929, - "gaa": 19930, - "norte": 19931, - "##ldon": 19932, - "modify": 19933, - "wavelength": 19934, - "abd": 19935, - "214": 19936, - "skinned": 19937, - "sprinter": 19938, - "forecast": 19939, - "scheduling": 19940, - "marries": 19941, - "squared": 19942, - "tentative": 19943, - "##chman": 19944, - "boer": 19945, - "##isch": 19946, - "bolts": 19947, - "swap": 19948, - "fisherman": 19949, - "assyrian": 19950, - "impatiently": 19951, - "guthrie": 19952, - "martins": 19953, - "murdoch": 19954, - "194": 19955, - "tanya": 19956, - "nicely": 19957, - "dolly": 19958, - "lacy": 19959, - "med": 19960, - "##45": 19961, - "syn": 19962, - "decks": 19963, - "fashionable": 19964, - "millionaire": 19965, - "##ust": 19966, - "surfing": 19967, - "##ml": 19968, - "##ision": 19969, - "heaved": 19970, - "tammy": 19971, - "consulate": 19972, - "attendees": 19973, - "routinely": 19974, - "197": 19975, - "fuse": 19976, - "saxophonist": 19977, - "backseat": 19978, - "malaya": 19979, - "##lord": 19980, - "scowl": 19981, - "tau": 19982, - "##ishly": 19983, - "193": 19984, - "sighted": 19985, - "steaming": 19986, - "##rks": 19987, - "303": 19988, - "911": 19989, - "##holes": 19990, - "##hong": 19991, - "ching": 19992, - "##wife": 19993, - "bless": 19994, - "conserved": 19995, - "jurassic": 19996, - "stacey": 19997, - "unix": 19998, - "zion": 19999, - "chunk": 20000, - "rigorous": 20001, - "blaine": 20002, - "198": 20003, - "peabody": 20004, - "slayer": 20005, - "dismay": 20006, - "brewers": 20007, - "nz": 20008, - "##jer": 20009, - "det": 20010, - "##glia": 20011, - "glover": 20012, - "postwar": 20013, - "int": 20014, - "penetration": 20015, - "sylvester": 20016, - "imitation": 20017, - "vertically": 20018, - "airlift": 20019, - "heiress": 20020, - "knoxville": 20021, - "viva": 20022, - "##uin": 20023, - "390": 20024, - "macon": 20025, - "##rim": 20026, - "##fighter": 20027, - "##gonal": 20028, - "janice": 20029, - "##orescence": 20030, - "##wari": 20031, - "marius": 20032, - "belongings": 20033, - "leicestershire": 20034, - "196": 20035, - "blanco": 20036, - "inverted": 20037, - "preseason": 20038, - "sanity": 20039, - "sobbing": 20040, - "##due": 20041, - "##elt": 20042, - "##dled": 20043, - "collingwood": 20044, - "regeneration": 20045, - "flickering": 20046, - "shortest": 20047, - "##mount": 20048, - "##osi": 20049, - "feminism": 20050, - "##lat": 20051, - "sherlock": 20052, - "cabinets": 20053, - "fumbled": 20054, - "northbound": 20055, - "precedent": 20056, - "snaps": 20057, - "##mme": 20058, - "researching": 20059, - "##akes": 20060, - "guillaume": 20061, - "insights": 20062, - "manipulated": 20063, - "vapor": 20064, - "neighbour": 20065, - "sap": 20066, - "gangster": 20067, - "frey": 20068, - "f1": 20069, - "stalking": 20070, - "scarcely": 20071, - "callie": 20072, - "barnett": 20073, - "tendencies": 20074, - "audi": 20075, - "doomed": 20076, - "assessing": 20077, - "slung": 20078, - "panchayat": 20079, - "ambiguous": 20080, - "bartlett": 20081, - "##etto": 20082, - "distributing": 20083, - "violating": 20084, - "wolverhampton": 20085, - "##hetic": 20086, - "swami": 20087, - "histoire": 20088, - "##urus": 20089, - "liable": 20090, - "pounder": 20091, - "groin": 20092, - "hussain": 20093, - "larsen": 20094, - "popping": 20095, - "surprises": 20096, - "##atter": 20097, - "vie": 20098, - "curt": 20099, - "##station": 20100, - "mute": 20101, - "relocate": 20102, - "musicals": 20103, - "authorization": 20104, - "richter": 20105, - "##sef": 20106, - "immortality": 20107, - "tna": 20108, - "bombings": 20109, - "##press": 20110, - "deteriorated": 20111, - "yiddish": 20112, - "##acious": 20113, - "robbed": 20114, - "colchester": 20115, - "cs": 20116, - "pmid": 20117, - "ao": 20118, - "verified": 20119, - "balancing": 20120, - "apostle": 20121, - "swayed": 20122, - "recognizable": 20123, - "oxfordshire": 20124, - "retention": 20125, - "nottinghamshire": 20126, - "contender": 20127, - "judd": 20128, - "invitational": 20129, - "shrimp": 20130, - "uhf": 20131, - "##icient": 20132, - "cleaner": 20133, - "longitudinal": 20134, - "tanker": 20135, - "##mur": 20136, - "acronym": 20137, - "broker": 20138, - "koppen": 20139, - "sundance": 20140, - "suppliers": 20141, - "##gil": 20142, - "4000": 20143, - "clipped": 20144, - "fuels": 20145, - "petite": 20146, - "##anne": 20147, - "landslide": 20148, - "helene": 20149, - "diversion": 20150, - "populous": 20151, - "landowners": 20152, - "auspices": 20153, - "melville": 20154, - "quantitative": 20155, - "##xes": 20156, - "ferries": 20157, - "nicky": 20158, - "##llus": 20159, - "doo": 20160, - "haunting": 20161, - "roche": 20162, - "carver": 20163, - "downed": 20164, - "unavailable": 20165, - "##pathy": 20166, - "approximation": 20167, - "hiroshima": 20168, - "##hue": 20169, - "garfield": 20170, - "valle": 20171, - "comparatively": 20172, - "keyboardist": 20173, - "traveler": 20174, - "##eit": 20175, - "congestion": 20176, - "calculating": 20177, - "subsidiaries": 20178, - "##bate": 20179, - "serb": 20180, - "modernization": 20181, - "fairies": 20182, - "deepened": 20183, - "ville": 20184, - "averages": 20185, - "##lore": 20186, - "inflammatory": 20187, - "tonga": 20188, - "##itch": 20189, - "co₂": 20190, - "squads": 20191, - "##hea": 20192, - "gigantic": 20193, - "serum": 20194, - "enjoyment": 20195, - "retailer": 20196, - "verona": 20197, - "35th": 20198, - "cis": 20199, - "##phobic": 20200, - "magna": 20201, - "technicians": 20202, - "##vati": 20203, - "arithmetic": 20204, - "##sport": 20205, - "levin": 20206, - "##dation": 20207, - "amtrak": 20208, - "chow": 20209, - "sienna": 20210, - "##eyer": 20211, - "backstage": 20212, - "entrepreneurship": 20213, - "##otic": 20214, - "learnt": 20215, - "tao": 20216, - "##udy": 20217, - "worcestershire": 20218, - "formulation": 20219, - "baggage": 20220, - "hesitant": 20221, - "bali": 20222, - "sabotage": 20223, - "##kari": 20224, - "barren": 20225, - "enhancing": 20226, - "murmur": 20227, - "pl": 20228, - "freshly": 20229, - "putnam": 20230, - "syntax": 20231, - "aces": 20232, - "medicines": 20233, - "resentment": 20234, - "bandwidth": 20235, - "##sier": 20236, - "grins": 20237, - "chili": 20238, - "guido": 20239, - "##sei": 20240, - "framing": 20241, - "implying": 20242, - "gareth": 20243, - "lissa": 20244, - "genevieve": 20245, - "pertaining": 20246, - "admissions": 20247, - "geo": 20248, - "thorpe": 20249, - "proliferation": 20250, - "sato": 20251, - "bela": 20252, - "analyzing": 20253, - "parting": 20254, - "##gor": 20255, - "awakened": 20256, - "##isman": 20257, - "huddled": 20258, - "secrecy": 20259, - "##kling": 20260, - "hush": 20261, - "gentry": 20262, - "540": 20263, - "dungeons": 20264, - "##ego": 20265, - "coasts": 20266, - "##utz": 20267, - "sacrificed": 20268, - "##chule": 20269, - "landowner": 20270, - "mutually": 20271, - "prevalence": 20272, - "programmer": 20273, - "adolescent": 20274, - "disrupted": 20275, - "seaside": 20276, - "gee": 20277, - "trusts": 20278, - "vamp": 20279, - "georgie": 20280, - "##nesian": 20281, - "##iol": 20282, - "schedules": 20283, - "sindh": 20284, - "##market": 20285, - "etched": 20286, - "hm": 20287, - "sparse": 20288, - "bey": 20289, - "beaux": 20290, - "scratching": 20291, - "gliding": 20292, - "unidentified": 20293, - "216": 20294, - "collaborating": 20295, - "gems": 20296, - "jesuits": 20297, - "oro": 20298, - "accumulation": 20299, - "shaping": 20300, - "mbe": 20301, - "anal": 20302, - "##xin": 20303, - "231": 20304, - "enthusiasts": 20305, - "newscast": 20306, - "##egan": 20307, - "janata": 20308, - "dewey": 20309, - "parkinson": 20310, - "179": 20311, - "ankara": 20312, - "biennial": 20313, - "towering": 20314, - "dd": 20315, - "inconsistent": 20316, - "950": 20317, - "##chet": 20318, - "thriving": 20319, - "terminate": 20320, - "cabins": 20321, - "furiously": 20322, - "eats": 20323, - "advocating": 20324, - "donkey": 20325, - "marley": 20326, - "muster": 20327, - "phyllis": 20328, - "leiden": 20329, - "##user": 20330, - "grassland": 20331, - "glittering": 20332, - "iucn": 20333, - "loneliness": 20334, - "217": 20335, - "memorandum": 20336, - "armenians": 20337, - "##ddle": 20338, - "popularized": 20339, - "rhodesia": 20340, - "60s": 20341, - "lame": 20342, - "##illon": 20343, - "sans": 20344, - "bikini": 20345, - "header": 20346, - "orbits": 20347, - "##xx": 20348, - "##finger": 20349, - "##ulator": 20350, - "sharif": 20351, - "spines": 20352, - "biotechnology": 20353, - "strolled": 20354, - "naughty": 20355, - "yates": 20356, - "##wire": 20357, - "fremantle": 20358, - "milo": 20359, - "##mour": 20360, - "abducted": 20361, - "removes": 20362, - "##atin": 20363, - "humming": 20364, - "wonderland": 20365, - "##chrome": 20366, - "##ester": 20367, - "hume": 20368, - "pivotal": 20369, - "##rates": 20370, - "armand": 20371, - "grams": 20372, - "believers": 20373, - "elector": 20374, - "rte": 20375, - "apron": 20376, - "bis": 20377, - "scraped": 20378, - "##yria": 20379, - "endorsement": 20380, - "initials": 20381, - "##llation": 20382, - "eps": 20383, - "dotted": 20384, - "hints": 20385, - "buzzing": 20386, - "emigration": 20387, - "nearer": 20388, - "##tom": 20389, - "indicators": 20390, - "##ulu": 20391, - "coarse": 20392, - "neutron": 20393, - "protectorate": 20394, - "##uze": 20395, - "directional": 20396, - "exploits": 20397, - "pains": 20398, - "loire": 20399, - "1830s": 20400, - "proponents": 20401, - "guggenheim": 20402, - "rabbits": 20403, - "ritchie": 20404, - "305": 20405, - "hectare": 20406, - "inputs": 20407, - "hutton": 20408, - "##raz": 20409, - "verify": 20410, - "##ako": 20411, - "boilers": 20412, - "longitude": 20413, - "##lev": 20414, - "skeletal": 20415, - "yer": 20416, - "emilia": 20417, - "citrus": 20418, - "compromised": 20419, - "##gau": 20420, - "pokemon": 20421, - "prescription": 20422, - "paragraph": 20423, - "eduard": 20424, - "cadillac": 20425, - "attire": 20426, - "categorized": 20427, - "kenyan": 20428, - "weddings": 20429, - "charley": 20430, - "##bourg": 20431, - "entertain": 20432, - "monmouth": 20433, - "##lles": 20434, - "nutrients": 20435, - "davey": 20436, - "mesh": 20437, - "incentive": 20438, - "practised": 20439, - "ecosystems": 20440, - "kemp": 20441, - "subdued": 20442, - "overheard": 20443, - "##rya": 20444, - "bodily": 20445, - "maxim": 20446, - "##nius": 20447, - "apprenticeship": 20448, - "ursula": 20449, - "##fight": 20450, - "lodged": 20451, - "rug": 20452, - "silesian": 20453, - "unconstitutional": 20454, - "patel": 20455, - "inspected": 20456, - "coyote": 20457, - "unbeaten": 20458, - "##hak": 20459, - "34th": 20460, - "disruption": 20461, - "convict": 20462, - "parcel": 20463, - "##cl": 20464, - "##nham": 20465, - "collier": 20466, - "implicated": 20467, - "mallory": 20468, - "##iac": 20469, - "##lab": 20470, - "susannah": 20471, - "winkler": 20472, - "##rber": 20473, - "shia": 20474, - "phelps": 20475, - "sediments": 20476, - "graphical": 20477, - "robotic": 20478, - "##sner": 20479, - "adulthood": 20480, - "mart": 20481, - "smoked": 20482, - "##isto": 20483, - "kathryn": 20484, - "clarified": 20485, - "##aran": 20486, - "divides": 20487, - "convictions": 20488, - "oppression": 20489, - "pausing": 20490, - "burying": 20491, - "##mt": 20492, - "federico": 20493, - "mathias": 20494, - "eileen": 20495, - "##tana": 20496, - "kite": 20497, - "hunched": 20498, - "##acies": 20499, - "189": 20500, - "##atz": 20501, - "disadvantage": 20502, - "liza": 20503, - "kinetic": 20504, - "greedy": 20505, - "paradox": 20506, - "yokohama": 20507, - "dowager": 20508, - "trunks": 20509, - "ventured": 20510, - "##gement": 20511, - "gupta": 20512, - "vilnius": 20513, - "olaf": 20514, - "##thest": 20515, - "crimean": 20516, - "hopper": 20517, - "##ej": 20518, - "progressively": 20519, - "arturo": 20520, - "mouthed": 20521, - "arrondissement": 20522, - "##fusion": 20523, - "rubin": 20524, - "simulcast": 20525, - "oceania": 20526, - "##orum": 20527, - "##stra": 20528, - "##rred": 20529, - "busiest": 20530, - "intensely": 20531, - "navigator": 20532, - "cary": 20533, - "##vine": 20534, - "##hini": 20535, - "##bies": 20536, - "fife": 20537, - "rowe": 20538, - "rowland": 20539, - "posing": 20540, - "insurgents": 20541, - "shafts": 20542, - "lawsuits": 20543, - "activate": 20544, - "conor": 20545, - "inward": 20546, - "culturally": 20547, - "garlic": 20548, - "265": 20549, - "##eering": 20550, - "eclectic": 20551, - "##hui": 20552, - "##kee": 20553, - "##nl": 20554, - "furrowed": 20555, - "vargas": 20556, - "meteorological": 20557, - "rendezvous": 20558, - "##aus": 20559, - "culinary": 20560, - "commencement": 20561, - "##dition": 20562, - "quota": 20563, - "##notes": 20564, - "mommy": 20565, - "salaries": 20566, - "overlapping": 20567, - "mule": 20568, - "##iology": 20569, - "##mology": 20570, - "sums": 20571, - "wentworth": 20572, - "##isk": 20573, - "##zione": 20574, - "mainline": 20575, - "subgroup": 20576, - "##illy": 20577, - "hack": 20578, - "plaintiff": 20579, - "verdi": 20580, - "bulb": 20581, - "differentiation": 20582, - "engagements": 20583, - "multinational": 20584, - "supplemented": 20585, - "bertrand": 20586, - "caller": 20587, - "regis": 20588, - "##naire": 20589, - "##sler": 20590, - "##arts": 20591, - "##imated": 20592, - "blossom": 20593, - "propagation": 20594, - "kilometer": 20595, - "viaduct": 20596, - "vineyards": 20597, - "##uate": 20598, - "beckett": 20599, - "optimization": 20600, - "golfer": 20601, - "songwriters": 20602, - "seminal": 20603, - "semitic": 20604, - "thud": 20605, - "volatile": 20606, - "evolving": 20607, - "ridley": 20608, - "##wley": 20609, - "trivial": 20610, - "distributions": 20611, - "scandinavia": 20612, - "jiang": 20613, - "##ject": 20614, - "wrestled": 20615, - "insistence": 20616, - "##dio": 20617, - "emphasizes": 20618, - "napkin": 20619, - "##ods": 20620, - "adjunct": 20621, - "rhyme": 20622, - "##ricted": 20623, - "##eti": 20624, - "hopeless": 20625, - "surrounds": 20626, - "tremble": 20627, - "32nd": 20628, - "smoky": 20629, - "##ntly": 20630, - "oils": 20631, - "medicinal": 20632, - "padded": 20633, - "steer": 20634, - "wilkes": 20635, - "219": 20636, - "255": 20637, - "concessions": 20638, - "hue": 20639, - "uniquely": 20640, - "blinded": 20641, - "landon": 20642, - "yahoo": 20643, - "##lane": 20644, - "hendrix": 20645, - "commemorating": 20646, - "dex": 20647, - "specify": 20648, - "chicks": 20649, - "##ggio": 20650, - "intercity": 20651, - "1400": 20652, - "morley": 20653, - "##torm": 20654, - "highlighting": 20655, - "##oting": 20656, - "pang": 20657, - "oblique": 20658, - "stalled": 20659, - "##liner": 20660, - "flirting": 20661, - "newborn": 20662, - "1769": 20663, - "bishopric": 20664, - "shaved": 20665, - "232": 20666, - "currie": 20667, - "##ush": 20668, - "dharma": 20669, - "spartan": 20670, - "##ooped": 20671, - "favorites": 20672, - "smug": 20673, - "novella": 20674, - "sirens": 20675, - "abusive": 20676, - "creations": 20677, - "espana": 20678, - "##lage": 20679, - "paradigm": 20680, - "semiconductor": 20681, - "sheen": 20682, - "##rdo": 20683, - "##yen": 20684, - "##zak": 20685, - "nrl": 20686, - "renew": 20687, - "##pose": 20688, - "##tur": 20689, - "adjutant": 20690, - "marches": 20691, - "norma": 20692, - "##enity": 20693, - "ineffective": 20694, - "weimar": 20695, - "grunt": 20696, - "##gat": 20697, - "lordship": 20698, - "plotting": 20699, - "expenditure": 20700, - "infringement": 20701, - "lbs": 20702, - "refrain": 20703, - "av": 20704, - "mimi": 20705, - "mistakenly": 20706, - "postmaster": 20707, - "1771": 20708, - "##bara": 20709, - "ras": 20710, - "motorsports": 20711, - "tito": 20712, - "199": 20713, - "subjective": 20714, - "##zza": 20715, - "bully": 20716, - "stew": 20717, - "##kaya": 20718, - "prescott": 20719, - "1a": 20720, - "##raphic": 20721, - "##zam": 20722, - "bids": 20723, - "styling": 20724, - "paranormal": 20725, - "reeve": 20726, - "sneaking": 20727, - "exploding": 20728, - "katz": 20729, - "akbar": 20730, - "migrant": 20731, - "syllables": 20732, - "indefinitely": 20733, - "##ogical": 20734, - "destroys": 20735, - "replaces": 20736, - "applause": 20737, - "##phine": 20738, - "pest": 20739, - "##fide": 20740, - "218": 20741, - "articulated": 20742, - "bertie": 20743, - "##thing": 20744, - "##cars": 20745, - "##ptic": 20746, - "courtroom": 20747, - "crowley": 20748, - "aesthetics": 20749, - "cummings": 20750, - "tehsil": 20751, - "hormones": 20752, - "titanic": 20753, - "dangerously": 20754, - "##ibe": 20755, - "stadion": 20756, - "jaenelle": 20757, - "auguste": 20758, - "ciudad": 20759, - "##chu": 20760, - "mysore": 20761, - "partisans": 20762, - "##sio": 20763, - "lucan": 20764, - "philipp": 20765, - "##aly": 20766, - "debating": 20767, - "henley": 20768, - "interiors": 20769, - "##rano": 20770, - "##tious": 20771, - "homecoming": 20772, - "beyonce": 20773, - "usher": 20774, - "henrietta": 20775, - "prepares": 20776, - "weeds": 20777, - "##oman": 20778, - "ely": 20779, - "plucked": 20780, - "##pire": 20781, - "##dable": 20782, - "luxurious": 20783, - "##aq": 20784, - "artifact": 20785, - "password": 20786, - "pasture": 20787, - "juno": 20788, - "maddy": 20789, - "minsk": 20790, - "##dder": 20791, - "##ologies": 20792, - "##rone": 20793, - "assessments": 20794, - "martian": 20795, - "royalist": 20796, - "1765": 20797, - "examines": 20798, - "##mani": 20799, - "##rge": 20800, - "nino": 20801, - "223": 20802, - "parry": 20803, - "scooped": 20804, - "relativity": 20805, - "##eli": 20806, - "##uting": 20807, - "##cao": 20808, - "congregational": 20809, - "noisy": 20810, - "traverse": 20811, - "##agawa": 20812, - "strikeouts": 20813, - "nickelodeon": 20814, - "obituary": 20815, - "transylvania": 20816, - "binds": 20817, - "depictions": 20818, - "polk": 20819, - "trolley": 20820, - "##yed": 20821, - "##lard": 20822, - "breeders": 20823, - "##under": 20824, - "dryly": 20825, - "hokkaido": 20826, - "1762": 20827, - "strengths": 20828, - "stacks": 20829, - "bonaparte": 20830, - "connectivity": 20831, - "neared": 20832, - "prostitutes": 20833, - "stamped": 20834, - "anaheim": 20835, - "gutierrez": 20836, - "sinai": 20837, - "##zzling": 20838, - "bram": 20839, - "fresno": 20840, - "madhya": 20841, - "##86": 20842, - "proton": 20843, - "##lena": 20844, - "##llum": 20845, - "##phon": 20846, - "reelected": 20847, - "wanda": 20848, - "##anus": 20849, - "##lb": 20850, - "ample": 20851, - "distinguishing": 20852, - "##yler": 20853, - "grasping": 20854, - "sermons": 20855, - "tomato": 20856, - "bland": 20857, - "stimulation": 20858, - "avenues": 20859, - "##eux": 20860, - "spreads": 20861, - "scarlett": 20862, - "fern": 20863, - "pentagon": 20864, - "assert": 20865, - "baird": 20866, - "chesapeake": 20867, - "ir": 20868, - "calmed": 20869, - "distortion": 20870, - "fatalities": 20871, - "##olis": 20872, - "correctional": 20873, - "pricing": 20874, - "##astic": 20875, - "##gina": 20876, - "prom": 20877, - "dammit": 20878, - "ying": 20879, - "collaborate": 20880, - "##chia": 20881, - "welterweight": 20882, - "33rd": 20883, - "pointer": 20884, - "substitution": 20885, - "bonded": 20886, - "umpire": 20887, - "communicating": 20888, - "multitude": 20889, - "paddle": 20890, - "##obe": 20891, - "federally": 20892, - "intimacy": 20893, - "##insky": 20894, - "betray": 20895, - "ssr": 20896, - "##lett": 20897, - "##lean": 20898, - "##lves": 20899, - "##therapy": 20900, - "airbus": 20901, - "##tery": 20902, - "functioned": 20903, - "ud": 20904, - "bearer": 20905, - "biomedical": 20906, - "netflix": 20907, - "##hire": 20908, - "##nca": 20909, - "condom": 20910, - "brink": 20911, - "ik": 20912, - "##nical": 20913, - "macy": 20914, - "##bet": 20915, - "flap": 20916, - "gma": 20917, - "experimented": 20918, - "jelly": 20919, - "lavender": 20920, - "##icles": 20921, - "##ulia": 20922, - "munro": 20923, - "##mian": 20924, - "##tial": 20925, - "rye": 20926, - "##rle": 20927, - "60th": 20928, - "gigs": 20929, - "hottest": 20930, - "rotated": 20931, - "predictions": 20932, - "fuji": 20933, - "bu": 20934, - "##erence": 20935, - "##omi": 20936, - "barangay": 20937, - "##fulness": 20938, - "##sas": 20939, - "clocks": 20940, - "##rwood": 20941, - "##liness": 20942, - "cereal": 20943, - "roe": 20944, - "wight": 20945, - "decker": 20946, - "uttered": 20947, - "babu": 20948, - "onion": 20949, - "xml": 20950, - "forcibly": 20951, - "##df": 20952, - "petra": 20953, - "sarcasm": 20954, - "hartley": 20955, - "peeled": 20956, - "storytelling": 20957, - "##42": 20958, - "##xley": 20959, - "##ysis": 20960, - "##ffa": 20961, - "fibre": 20962, - "kiel": 20963, - "auditor": 20964, - "fig": 20965, - "harald": 20966, - "greenville": 20967, - "##berries": 20968, - "geographically": 20969, - "nell": 20970, - "quartz": 20971, - "##athic": 20972, - "cemeteries": 20973, - "##lr": 20974, - "crossings": 20975, - "nah": 20976, - "holloway": 20977, - "reptiles": 20978, - "chun": 20979, - "sichuan": 20980, - "snowy": 20981, - "660": 20982, - "corrections": 20983, - "##ivo": 20984, - "zheng": 20985, - "ambassadors": 20986, - "blacksmith": 20987, - "fielded": 20988, - "fluids": 20989, - "hardcover": 20990, - "turnover": 20991, - "medications": 20992, - "melvin": 20993, - "academies": 20994, - "##erton": 20995, - "ro": 20996, - "roach": 20997, - "absorbing": 20998, - "spaniards": 20999, - "colton": 21000, - "##founded": 21001, - "outsider": 21002, - "espionage": 21003, - "kelsey": 21004, - "245": 21005, - "edible": 21006, - "##ulf": 21007, - "dora": 21008, - "establishes": 21009, - "##sham": 21010, - "##tries": 21011, - "contracting": 21012, - "##tania": 21013, - "cinematic": 21014, - "costello": 21015, - "nesting": 21016, - "##uron": 21017, - "connolly": 21018, - "duff": 21019, - "##nology": 21020, - "mma": 21021, - "##mata": 21022, - "fergus": 21023, - "sexes": 21024, - "gi": 21025, - "optics": 21026, - "spectator": 21027, - "woodstock": 21028, - "banning": 21029, - "##hee": 21030, - "##fle": 21031, - "differentiate": 21032, - "outfielder": 21033, - "refinery": 21034, - "226": 21035, - "312": 21036, - "gerhard": 21037, - "horde": 21038, - "lair": 21039, - "drastically": 21040, - "##udi": 21041, - "landfall": 21042, - "##cheng": 21043, - "motorsport": 21044, - "odi": 21045, - "##achi": 21046, - "predominant": 21047, - "quay": 21048, - "skins": 21049, - "##ental": 21050, - "edna": 21051, - "harshly": 21052, - "complementary": 21053, - "murdering": 21054, - "##aves": 21055, - "wreckage": 21056, - "##90": 21057, - "ono": 21058, - "outstretched": 21059, - "lennox": 21060, - "munitions": 21061, - "galen": 21062, - "reconcile": 21063, - "470": 21064, - "scalp": 21065, - "bicycles": 21066, - "gillespie": 21067, - "questionable": 21068, - "rosenberg": 21069, - "guillermo": 21070, - "hostel": 21071, - "jarvis": 21072, - "kabul": 21073, - "volvo": 21074, - "opium": 21075, - "yd": 21076, - "##twined": 21077, - "abuses": 21078, - "decca": 21079, - "outpost": 21080, - "##cino": 21081, - "sensible": 21082, - "neutrality": 21083, - "##64": 21084, - "ponce": 21085, - "anchorage": 21086, - "atkins": 21087, - "turrets": 21088, - "inadvertently": 21089, - "disagree": 21090, - "libre": 21091, - "vodka": 21092, - "reassuring": 21093, - "weighs": 21094, - "##yal": 21095, - "glide": 21096, - "jumper": 21097, - "ceilings": 21098, - "repertory": 21099, - "outs": 21100, - "stain": 21101, - "##bial": 21102, - "envy": 21103, - "##ucible": 21104, - "smashing": 21105, - "heightened": 21106, - "policing": 21107, - "hyun": 21108, - "mixes": 21109, - "lai": 21110, - "prima": 21111, - "##ples": 21112, - "celeste": 21113, - "##bina": 21114, - "lucrative": 21115, - "intervened": 21116, - "kc": 21117, - "manually": 21118, - "##rned": 21119, - "stature": 21120, - "staffed": 21121, - "bun": 21122, - "bastards": 21123, - "nairobi": 21124, - "priced": 21125, - "##auer": 21126, - "thatcher": 21127, - "##kia": 21128, - "tripped": 21129, - "comune": 21130, - "##ogan": 21131, - "##pled": 21132, - "brasil": 21133, - "incentives": 21134, - "emanuel": 21135, - "hereford": 21136, - "musica": 21137, - "##kim": 21138, - "benedictine": 21139, - "biennale": 21140, - "##lani": 21141, - "eureka": 21142, - "gardiner": 21143, - "rb": 21144, - "knocks": 21145, - "sha": 21146, - "##ael": 21147, - "##elled": 21148, - "##onate": 21149, - "efficacy": 21150, - "ventura": 21151, - "masonic": 21152, - "sanford": 21153, - "maize": 21154, - "leverage": 21155, - "##feit": 21156, - "capacities": 21157, - "santana": 21158, - "##aur": 21159, - "novelty": 21160, - "vanilla": 21161, - "##cter": 21162, - "##tour": 21163, - "benin": 21164, - "##oir": 21165, - "##rain": 21166, - "neptune": 21167, - "drafting": 21168, - "tallinn": 21169, - "##cable": 21170, - "humiliation": 21171, - "##boarding": 21172, - "schleswig": 21173, - "fabian": 21174, - "bernardo": 21175, - "liturgy": 21176, - "spectacle": 21177, - "sweeney": 21178, - "pont": 21179, - "routledge": 21180, - "##tment": 21181, - "cosmos": 21182, - "ut": 21183, - "hilt": 21184, - "sleek": 21185, - "universally": 21186, - "##eville": 21187, - "##gawa": 21188, - "typed": 21189, - "##dry": 21190, - "favors": 21191, - "allegheny": 21192, - "glaciers": 21193, - "##rly": 21194, - "recalling": 21195, - "aziz": 21196, - "##log": 21197, - "parasite": 21198, - "requiem": 21199, - "auf": 21200, - "##berto": 21201, - "##llin": 21202, - "illumination": 21203, - "##breaker": 21204, - "##issa": 21205, - "festivities": 21206, - "bows": 21207, - "govern": 21208, - "vibe": 21209, - "vp": 21210, - "333": 21211, - "sprawled": 21212, - "larson": 21213, - "pilgrim": 21214, - "bwf": 21215, - "leaping": 21216, - "##rts": 21217, - "##ssel": 21218, - "alexei": 21219, - "greyhound": 21220, - "hoarse": 21221, - "##dler": 21222, - "##oration": 21223, - "seneca": 21224, - "##cule": 21225, - "gaping": 21226, - "##ulously": 21227, - "##pura": 21228, - "cinnamon": 21229, - "##gens": 21230, - "##rricular": 21231, - "craven": 21232, - "fantasies": 21233, - "houghton": 21234, - "engined": 21235, - "reigned": 21236, - "dictator": 21237, - "supervising": 21238, - "##oris": 21239, - "bogota": 21240, - "commentaries": 21241, - "unnatural": 21242, - "fingernails": 21243, - "spirituality": 21244, - "tighten": 21245, - "##tm": 21246, - "canadiens": 21247, - "protesting": 21248, - "intentional": 21249, - "cheers": 21250, - "sparta": 21251, - "##ytic": 21252, - "##iere": 21253, - "##zine": 21254, - "widen": 21255, - "belgarath": 21256, - "controllers": 21257, - "dodd": 21258, - "iaaf": 21259, - "navarre": 21260, - "##ication": 21261, - "defect": 21262, - "squire": 21263, - "steiner": 21264, - "whisky": 21265, - "##mins": 21266, - "560": 21267, - "inevitably": 21268, - "tome": 21269, - "##gold": 21270, - "chew": 21271, - "##uid": 21272, - "##lid": 21273, - "elastic": 21274, - "##aby": 21275, - "streaked": 21276, - "alliances": 21277, - "jailed": 21278, - "regal": 21279, - "##ined": 21280, - "##phy": 21281, - "czechoslovak": 21282, - "narration": 21283, - "absently": 21284, - "##uld": 21285, - "bluegrass": 21286, - "guangdong": 21287, - "quran": 21288, - "criticizing": 21289, - "hose": 21290, - "hari": 21291, - "##liest": 21292, - "##owa": 21293, - "skier": 21294, - "streaks": 21295, - "deploy": 21296, - "##lom": 21297, - "raft": 21298, - "bose": 21299, - "dialed": 21300, - "huff": 21301, - "##eira": 21302, - "haifa": 21303, - "simplest": 21304, - "bursting": 21305, - "endings": 21306, - "ib": 21307, - "sultanate": 21308, - "##titled": 21309, - "franks": 21310, - "whitman": 21311, - "ensures": 21312, - "sven": 21313, - "##ggs": 21314, - "collaborators": 21315, - "forster": 21316, - "organising": 21317, - "ui": 21318, - "banished": 21319, - "napier": 21320, - "injustice": 21321, - "teller": 21322, - "layered": 21323, - "thump": 21324, - "##otti": 21325, - "roc": 21326, - "battleships": 21327, - "evidenced": 21328, - "fugitive": 21329, - "sadie": 21330, - "robotics": 21331, - "##roud": 21332, - "equatorial": 21333, - "geologist": 21334, - "##iza": 21335, - "yielding": 21336, - "##bron": 21337, - "##sr": 21338, - "internationale": 21339, - "mecca": 21340, - "##diment": 21341, - "sbs": 21342, - "skyline": 21343, - "toad": 21344, - "uploaded": 21345, - "reflective": 21346, - "undrafted": 21347, - "lal": 21348, - "leafs": 21349, - "bayern": 21350, - "##dai": 21351, - "lakshmi": 21352, - "shortlisted": 21353, - "##stick": 21354, - "##wicz": 21355, - "camouflage": 21356, - "donate": 21357, - "af": 21358, - "christi": 21359, - "lau": 21360, - "##acio": 21361, - "disclosed": 21362, - "nemesis": 21363, - "1761": 21364, - "assemble": 21365, - "straining": 21366, - "northamptonshire": 21367, - "tal": 21368, - "##asi": 21369, - "bernardino": 21370, - "premature": 21371, - "heidi": 21372, - "42nd": 21373, - "coefficients": 21374, - "galactic": 21375, - "reproduce": 21376, - "buzzed": 21377, - "sensations": 21378, - "zionist": 21379, - "monsieur": 21380, - "myrtle": 21381, - "##eme": 21382, - "archery": 21383, - "strangled": 21384, - "musically": 21385, - "viewpoint": 21386, - "antiquities": 21387, - "bei": 21388, - "trailers": 21389, - "seahawks": 21390, - "cured": 21391, - "pee": 21392, - "preferring": 21393, - "tasmanian": 21394, - "lange": 21395, - "sul": 21396, - "##mail": 21397, - "##working": 21398, - "colder": 21399, - "overland": 21400, - "lucivar": 21401, - "massey": 21402, - "gatherings": 21403, - "haitian": 21404, - "##smith": 21405, - "disapproval": 21406, - "flaws": 21407, - "##cco": 21408, - "##enbach": 21409, - "1766": 21410, - "npr": 21411, - "##icular": 21412, - "boroughs": 21413, - "creole": 21414, - "forums": 21415, - "techno": 21416, - "1755": 21417, - "dent": 21418, - "abdominal": 21419, - "streetcar": 21420, - "##eson": 21421, - "##stream": 21422, - "procurement": 21423, - "gemini": 21424, - "predictable": 21425, - "##tya": 21426, - "acheron": 21427, - "christoph": 21428, - "feeder": 21429, - "fronts": 21430, - "vendor": 21431, - "bernhard": 21432, - "jammu": 21433, - "tumors": 21434, - "slang": 21435, - "##uber": 21436, - "goaltender": 21437, - "twists": 21438, - "curving": 21439, - "manson": 21440, - "vuelta": 21441, - "mer": 21442, - "peanut": 21443, - "confessions": 21444, - "pouch": 21445, - "unpredictable": 21446, - "allowance": 21447, - "theodor": 21448, - "vascular": 21449, - "##factory": 21450, - "bala": 21451, - "authenticity": 21452, - "metabolic": 21453, - "coughing": 21454, - "nanjing": 21455, - "##cea": 21456, - "pembroke": 21457, - "##bard": 21458, - "splendid": 21459, - "36th": 21460, - "ff": 21461, - "hourly": 21462, - "##ahu": 21463, - "elmer": 21464, - "handel": 21465, - "##ivate": 21466, - "awarding": 21467, - "thrusting": 21468, - "dl": 21469, - "experimentation": 21470, - "##hesion": 21471, - "##46": 21472, - "caressed": 21473, - "entertained": 21474, - "steak": 21475, - "##rangle": 21476, - "biologist": 21477, - "orphans": 21478, - "baroness": 21479, - "oyster": 21480, - "stepfather": 21481, - "##dridge": 21482, - "mirage": 21483, - "reefs": 21484, - "speeding": 21485, - "##31": 21486, - "barons": 21487, - "1764": 21488, - "227": 21489, - "inhabit": 21490, - "preached": 21491, - "repealed": 21492, - "##tral": 21493, - "honoring": 21494, - "boogie": 21495, - "captives": 21496, - "administer": 21497, - "johanna": 21498, - "##imate": 21499, - "gel": 21500, - "suspiciously": 21501, - "1767": 21502, - "sobs": 21503, - "##dington": 21504, - "backbone": 21505, - "hayward": 21506, - "garry": 21507, - "##folding": 21508, - "##nesia": 21509, - "maxi": 21510, - "##oof": 21511, - "##ppe": 21512, - "ellison": 21513, - "galileo": 21514, - "##stand": 21515, - "crimea": 21516, - "frenzy": 21517, - "amour": 21518, - "bumper": 21519, - "matrices": 21520, - "natalia": 21521, - "baking": 21522, - "garth": 21523, - "palestinians": 21524, - "##grove": 21525, - "smack": 21526, - "conveyed": 21527, - "ensembles": 21528, - "gardening": 21529, - "##manship": 21530, - "##rup": 21531, - "##stituting": 21532, - "1640": 21533, - "harvesting": 21534, - "topography": 21535, - "jing": 21536, - "shifters": 21537, - "dormitory": 21538, - "##carriage": 21539, - "##lston": 21540, - "ist": 21541, - "skulls": 21542, - "##stadt": 21543, - "dolores": 21544, - "jewellery": 21545, - "sarawak": 21546, - "##wai": 21547, - "##zier": 21548, - "fences": 21549, - "christy": 21550, - "confinement": 21551, - "tumbling": 21552, - "credibility": 21553, - "fir": 21554, - "stench": 21555, - "##bria": 21556, - "##plication": 21557, - "##nged": 21558, - "##sam": 21559, - "virtues": 21560, - "##belt": 21561, - "marjorie": 21562, - "pba": 21563, - "##eem": 21564, - "##made": 21565, - "celebrates": 21566, - "schooner": 21567, - "agitated": 21568, - "barley": 21569, - "fulfilling": 21570, - "anthropologist": 21571, - "##pro": 21572, - "restrict": 21573, - "novi": 21574, - "regulating": 21575, - "##nent": 21576, - "padres": 21577, - "##rani": 21578, - "##hesive": 21579, - "loyola": 21580, - "tabitha": 21581, - "milky": 21582, - "olson": 21583, - "proprietor": 21584, - "crambidae": 21585, - "guarantees": 21586, - "intercollegiate": 21587, - "ljubljana": 21588, - "hilda": 21589, - "##sko": 21590, - "ignorant": 21591, - "hooded": 21592, - "##lts": 21593, - "sardinia": 21594, - "##lidae": 21595, - "##vation": 21596, - "frontman": 21597, - "privileged": 21598, - "witchcraft": 21599, - "##gp": 21600, - "jammed": 21601, - "laude": 21602, - "poking": 21603, - "##than": 21604, - "bracket": 21605, - "amazement": 21606, - "yunnan": 21607, - "##erus": 21608, - "maharaja": 21609, - "linnaeus": 21610, - "264": 21611, - "commissioning": 21612, - "milano": 21613, - "peacefully": 21614, - "##logies": 21615, - "akira": 21616, - "rani": 21617, - "regulator": 21618, - "##36": 21619, - "grasses": 21620, - "##rance": 21621, - "luzon": 21622, - "crows": 21623, - "compiler": 21624, - "gretchen": 21625, - "seaman": 21626, - "edouard": 21627, - "tab": 21628, - "buccaneers": 21629, - "ellington": 21630, - "hamlets": 21631, - "whig": 21632, - "socialists": 21633, - "##anto": 21634, - "directorial": 21635, - "easton": 21636, - "mythological": 21637, - "##kr": 21638, - "##vary": 21639, - "rhineland": 21640, - "semantic": 21641, - "taut": 21642, - "dune": 21643, - "inventions": 21644, - "succeeds": 21645, - "##iter": 21646, - "replication": 21647, - "branched": 21648, - "##pired": 21649, - "jul": 21650, - "prosecuted": 21651, - "kangaroo": 21652, - "penetrated": 21653, - "##avian": 21654, - "middlesbrough": 21655, - "doses": 21656, - "bleak": 21657, - "madam": 21658, - "predatory": 21659, - "relentless": 21660, - "##vili": 21661, - "reluctance": 21662, - "##vir": 21663, - "hailey": 21664, - "crore": 21665, - "silvery": 21666, - "1759": 21667, - "monstrous": 21668, - "swimmers": 21669, - "transmissions": 21670, - "hawthorn": 21671, - "informing": 21672, - "##eral": 21673, - "toilets": 21674, - "caracas": 21675, - "crouch": 21676, - "kb": 21677, - "##sett": 21678, - "295": 21679, - "cartel": 21680, - "hadley": 21681, - "##aling": 21682, - "alexia": 21683, - "yvonne": 21684, - "##biology": 21685, - "cinderella": 21686, - "eton": 21687, - "superb": 21688, - "blizzard": 21689, - "stabbing": 21690, - "industrialist": 21691, - "maximus": 21692, - "##gm": 21693, - "##orus": 21694, - "groves": 21695, - "maud": 21696, - "clade": 21697, - "oversized": 21698, - "comedic": 21699, - "##bella": 21700, - "rosen": 21701, - "nomadic": 21702, - "fulham": 21703, - "montane": 21704, - "beverages": 21705, - "galaxies": 21706, - "redundant": 21707, - "swarm": 21708, - "##rot": 21709, - "##folia": 21710, - "##llis": 21711, - "buckinghamshire": 21712, - "fen": 21713, - "bearings": 21714, - "bahadur": 21715, - "##rom": 21716, - "gilles": 21717, - "phased": 21718, - "dynamite": 21719, - "faber": 21720, - "benoit": 21721, - "vip": 21722, - "##ount": 21723, - "##wd": 21724, - "booking": 21725, - "fractured": 21726, - "tailored": 21727, - "anya": 21728, - "spices": 21729, - "westwood": 21730, - "cairns": 21731, - "auditions": 21732, - "inflammation": 21733, - "steamed": 21734, - "##rocity": 21735, - "##acion": 21736, - "##urne": 21737, - "skyla": 21738, - "thereof": 21739, - "watford": 21740, - "torment": 21741, - "archdeacon": 21742, - "transforms": 21743, - "lulu": 21744, - "demeanor": 21745, - "fucked": 21746, - "serge": 21747, - "##sor": 21748, - "mckenna": 21749, - "minas": 21750, - "entertainer": 21751, - "##icide": 21752, - "caress": 21753, - "originate": 21754, - "residue": 21755, - "##sty": 21756, - "1740": 21757, - "##ilised": 21758, - "##org": 21759, - "beech": 21760, - "##wana": 21761, - "subsidies": 21762, - "##ghton": 21763, - "emptied": 21764, - "gladstone": 21765, - "ru": 21766, - "firefighters": 21767, - "voodoo": 21768, - "##rcle": 21769, - "het": 21770, - "nightingale": 21771, - "tamara": 21772, - "edmond": 21773, - "ingredient": 21774, - "weaknesses": 21775, - "silhouette": 21776, - "285": 21777, - "compatibility": 21778, - "withdrawing": 21779, - "hampson": 21780, - "##mona": 21781, - "anguish": 21782, - "giggling": 21783, - "##mber": 21784, - "bookstore": 21785, - "##jiang": 21786, - "southernmost": 21787, - "tilting": 21788, - "##vance": 21789, - "bai": 21790, - "economical": 21791, - "rf": 21792, - "briefcase": 21793, - "dreadful": 21794, - "hinted": 21795, - "projections": 21796, - "shattering": 21797, - "totaling": 21798, - "##rogate": 21799, - "analogue": 21800, - "indicted": 21801, - "periodical": 21802, - "fullback": 21803, - "##dman": 21804, - "haynes": 21805, - "##tenberg": 21806, - "##ffs": 21807, - "##ishment": 21808, - "1745": 21809, - "thirst": 21810, - "stumble": 21811, - "penang": 21812, - "vigorous": 21813, - "##ddling": 21814, - "##kor": 21815, - "##lium": 21816, - "octave": 21817, - "##ove": 21818, - "##enstein": 21819, - "##inen": 21820, - "##ones": 21821, - "siberian": 21822, - "##uti": 21823, - "cbn": 21824, - "repeal": 21825, - "swaying": 21826, - "##vington": 21827, - "khalid": 21828, - "tanaka": 21829, - "unicorn": 21830, - "otago": 21831, - "plastered": 21832, - "lobe": 21833, - "riddle": 21834, - "##rella": 21835, - "perch": 21836, - "##ishing": 21837, - "croydon": 21838, - "filtered": 21839, - "graeme": 21840, - "tripoli": 21841, - "##ossa": 21842, - "crocodile": 21843, - "##chers": 21844, - "sufi": 21845, - "mined": 21846, - "##tung": 21847, - "inferno": 21848, - "lsu": 21849, - "##phi": 21850, - "swelled": 21851, - "utilizes": 21852, - "£2": 21853, - "cale": 21854, - "periodicals": 21855, - "styx": 21856, - "hike": 21857, - "informally": 21858, - "coop": 21859, - "lund": 21860, - "##tidae": 21861, - "ala": 21862, - "hen": 21863, - "qui": 21864, - "transformations": 21865, - "disposed": 21866, - "sheath": 21867, - "chickens": 21868, - "##cade": 21869, - "fitzroy": 21870, - "sas": 21871, - "silesia": 21872, - "unacceptable": 21873, - "odisha": 21874, - "1650": 21875, - "sabrina": 21876, - "pe": 21877, - "spokane": 21878, - "ratios": 21879, - "athena": 21880, - "massage": 21881, - "shen": 21882, - "dilemma": 21883, - "##drum": 21884, - "##riz": 21885, - "##hul": 21886, - "corona": 21887, - "doubtful": 21888, - "niall": 21889, - "##pha": 21890, - "##bino": 21891, - "fines": 21892, - "cite": 21893, - "acknowledging": 21894, - "bangor": 21895, - "ballard": 21896, - "bathurst": 21897, - "##resh": 21898, - "huron": 21899, - "mustered": 21900, - "alzheimer": 21901, - "garments": 21902, - "kinase": 21903, - "tyre": 21904, - "warship": 21905, - "##cp": 21906, - "flashback": 21907, - "pulmonary": 21908, - "braun": 21909, - "cheat": 21910, - "kamal": 21911, - "cyclists": 21912, - "constructions": 21913, - "grenades": 21914, - "ndp": 21915, - "traveller": 21916, - "excuses": 21917, - "stomped": 21918, - "signalling": 21919, - "trimmed": 21920, - "futsal": 21921, - "mosques": 21922, - "relevance": 21923, - "##wine": 21924, - "wta": 21925, - "##23": 21926, - "##vah": 21927, - "##lter": 21928, - "hoc": 21929, - "##riding": 21930, - "optimistic": 21931, - "##´s": 21932, - "deco": 21933, - "sim": 21934, - "interacting": 21935, - "rejecting": 21936, - "moniker": 21937, - "waterways": 21938, - "##ieri": 21939, - "##oku": 21940, - "mayors": 21941, - "gdansk": 21942, - "outnumbered": 21943, - "pearls": 21944, - "##ended": 21945, - "##hampton": 21946, - "fairs": 21947, - "totals": 21948, - "dominating": 21949, - "262": 21950, - "notions": 21951, - "stairway": 21952, - "compiling": 21953, - "pursed": 21954, - "commodities": 21955, - "grease": 21956, - "yeast": 21957, - "##jong": 21958, - "carthage": 21959, - "griffiths": 21960, - "residual": 21961, - "amc": 21962, - "contraction": 21963, - "laird": 21964, - "sapphire": 21965, - "##marine": 21966, - "##ivated": 21967, - "amalgamation": 21968, - "dissolve": 21969, - "inclination": 21970, - "lyle": 21971, - "packaged": 21972, - "altitudes": 21973, - "suez": 21974, - "canons": 21975, - "graded": 21976, - "lurched": 21977, - "narrowing": 21978, - "boasts": 21979, - "guise": 21980, - "wed": 21981, - "enrico": 21982, - "##ovsky": 21983, - "rower": 21984, - "scarred": 21985, - "bree": 21986, - "cub": 21987, - "iberian": 21988, - "protagonists": 21989, - "bargaining": 21990, - "proposing": 21991, - "trainers": 21992, - "voyages": 21993, - "vans": 21994, - "fishes": 21995, - "##aea": 21996, - "##ivist": 21997, - "##verance": 21998, - "encryption": 21999, - "artworks": 22000, - "kazan": 22001, - "sabre": 22002, - "cleopatra": 22003, - "hepburn": 22004, - "rotting": 22005, - "supremacy": 22006, - "mecklenburg": 22007, - "##brate": 22008, - "burrows": 22009, - "hazards": 22010, - "outgoing": 22011, - "flair": 22012, - "organizes": 22013, - "##ctions": 22014, - "scorpion": 22015, - "##usions": 22016, - "boo": 22017, - "234": 22018, - "chevalier": 22019, - "dunedin": 22020, - "slapping": 22021, - "##34": 22022, - "ineligible": 22023, - "pensions": 22024, - "##38": 22025, - "##omic": 22026, - "manufactures": 22027, - "emails": 22028, - "bismarck": 22029, - "238": 22030, - "weakening": 22031, - "blackish": 22032, - "ding": 22033, - "mcgee": 22034, - "quo": 22035, - "##rling": 22036, - "northernmost": 22037, - "xx": 22038, - "manpower": 22039, - "greed": 22040, - "sampson": 22041, - "clicking": 22042, - "##ange": 22043, - "##horpe": 22044, - "##inations": 22045, - "##roving": 22046, - "torre": 22047, - "##eptive": 22048, - "##moral": 22049, - "symbolism": 22050, - "38th": 22051, - "asshole": 22052, - "meritorious": 22053, - "outfits": 22054, - "splashed": 22055, - "biographies": 22056, - "sprung": 22057, - "astros": 22058, - "##tale": 22059, - "302": 22060, - "737": 22061, - "filly": 22062, - "raoul": 22063, - "nw": 22064, - "tokugawa": 22065, - "linden": 22066, - "clubhouse": 22067, - "##apa": 22068, - "tracts": 22069, - "romano": 22070, - "##pio": 22071, - "putin": 22072, - "tags": 22073, - "##note": 22074, - "chained": 22075, - "dickson": 22076, - "gunshot": 22077, - "moe": 22078, - "gunn": 22079, - "rashid": 22080, - "##tails": 22081, - "zipper": 22082, - "##bas": 22083, - "##nea": 22084, - "contrasted": 22085, - "##ply": 22086, - "##udes": 22087, - "plum": 22088, - "pharaoh": 22089, - "##pile": 22090, - "aw": 22091, - "comedies": 22092, - "ingrid": 22093, - "sandwiches": 22094, - "subdivisions": 22095, - "1100": 22096, - "mariana": 22097, - "nokia": 22098, - "kamen": 22099, - "hz": 22100, - "delaney": 22101, - "veto": 22102, - "herring": 22103, - "##words": 22104, - "possessive": 22105, - "outlines": 22106, - "##roup": 22107, - "siemens": 22108, - "stairwell": 22109, - "rc": 22110, - "gallantry": 22111, - "messiah": 22112, - "palais": 22113, - "yells": 22114, - "233": 22115, - "zeppelin": 22116, - "##dm": 22117, - "bolivar": 22118, - "##cede": 22119, - "smackdown": 22120, - "mckinley": 22121, - "##mora": 22122, - "##yt": 22123, - "muted": 22124, - "geologic": 22125, - "finely": 22126, - "unitary": 22127, - "avatar": 22128, - "hamas": 22129, - "maynard": 22130, - "rees": 22131, - "bog": 22132, - "contrasting": 22133, - "##rut": 22134, - "liv": 22135, - "chico": 22136, - "disposition": 22137, - "pixel": 22138, - "##erate": 22139, - "becca": 22140, - "dmitry": 22141, - "yeshiva": 22142, - "narratives": 22143, - "##lva": 22144, - "##ulton": 22145, - "mercenary": 22146, - "sharpe": 22147, - "tempered": 22148, - "navigate": 22149, - "stealth": 22150, - "amassed": 22151, - "keynes": 22152, - "##lini": 22153, - "untouched": 22154, - "##rrie": 22155, - "havoc": 22156, - "lithium": 22157, - "##fighting": 22158, - "abyss": 22159, - "graf": 22160, - "southward": 22161, - "wolverine": 22162, - "balloons": 22163, - "implements": 22164, - "ngos": 22165, - "transitions": 22166, - "##icum": 22167, - "ambushed": 22168, - "concacaf": 22169, - "dormant": 22170, - "economists": 22171, - "##dim": 22172, - "costing": 22173, - "csi": 22174, - "rana": 22175, - "universite": 22176, - "boulders": 22177, - "verity": 22178, - "##llon": 22179, - "collin": 22180, - "mellon": 22181, - "misses": 22182, - "cypress": 22183, - "fluorescent": 22184, - "lifeless": 22185, - "spence": 22186, - "##ulla": 22187, - "crewe": 22188, - "shepard": 22189, - "pak": 22190, - "revelations": 22191, - "##م": 22192, - "jolly": 22193, - "gibbons": 22194, - "paw": 22195, - "##dro": 22196, - "##quel": 22197, - "freeing": 22198, - "##test": 22199, - "shack": 22200, - "fries": 22201, - "palatine": 22202, - "##51": 22203, - "##hiko": 22204, - "accompaniment": 22205, - "cruising": 22206, - "recycled": 22207, - "##aver": 22208, - "erwin": 22209, - "sorting": 22210, - "synthesizers": 22211, - "dyke": 22212, - "realities": 22213, - "sg": 22214, - "strides": 22215, - "enslaved": 22216, - "wetland": 22217, - "##ghan": 22218, - "competence": 22219, - "gunpowder": 22220, - "grassy": 22221, - "maroon": 22222, - "reactors": 22223, - "objection": 22224, - "##oms": 22225, - "carlson": 22226, - "gearbox": 22227, - "macintosh": 22228, - "radios": 22229, - "shelton": 22230, - "##sho": 22231, - "clergyman": 22232, - "prakash": 22233, - "254": 22234, - "mongols": 22235, - "trophies": 22236, - "oricon": 22237, - "228": 22238, - "stimuli": 22239, - "twenty20": 22240, - "cantonese": 22241, - "cortes": 22242, - "mirrored": 22243, - "##saurus": 22244, - "bhp": 22245, - "cristina": 22246, - "melancholy": 22247, - "##lating": 22248, - "enjoyable": 22249, - "nuevo": 22250, - "##wny": 22251, - "downfall": 22252, - "schumacher": 22253, - "##ind": 22254, - "banging": 22255, - "lausanne": 22256, - "rumbled": 22257, - "paramilitary": 22258, - "reflex": 22259, - "ax": 22260, - "amplitude": 22261, - "migratory": 22262, - "##gall": 22263, - "##ups": 22264, - "midi": 22265, - "barnard": 22266, - "lastly": 22267, - "sherry": 22268, - "##hp": 22269, - "##nall": 22270, - "keystone": 22271, - "##kra": 22272, - "carleton": 22273, - "slippery": 22274, - "##53": 22275, - "coloring": 22276, - "foe": 22277, - "socket": 22278, - "otter": 22279, - "##rgos": 22280, - "mats": 22281, - "##tose": 22282, - "consultants": 22283, - "bafta": 22284, - "bison": 22285, - "topping": 22286, - "##km": 22287, - "490": 22288, - "primal": 22289, - "abandonment": 22290, - "transplant": 22291, - "atoll": 22292, - "hideous": 22293, - "mort": 22294, - "pained": 22295, - "reproduced": 22296, - "tae": 22297, - "howling": 22298, - "##turn": 22299, - "unlawful": 22300, - "billionaire": 22301, - "hotter": 22302, - "poised": 22303, - "lansing": 22304, - "##chang": 22305, - "dinamo": 22306, - "retro": 22307, - "messing": 22308, - "nfc": 22309, - "domesday": 22310, - "##mina": 22311, - "blitz": 22312, - "timed": 22313, - "##athing": 22314, - "##kley": 22315, - "ascending": 22316, - "gesturing": 22317, - "##izations": 22318, - "signaled": 22319, - "tis": 22320, - "chinatown": 22321, - "mermaid": 22322, - "savanna": 22323, - "jameson": 22324, - "##aint": 22325, - "catalina": 22326, - "##pet": 22327, - "##hers": 22328, - "cochrane": 22329, - "cy": 22330, - "chatting": 22331, - "##kus": 22332, - "alerted": 22333, - "computation": 22334, - "mused": 22335, - "noelle": 22336, - "majestic": 22337, - "mohawk": 22338, - "campo": 22339, - "octagonal": 22340, - "##sant": 22341, - "##hend": 22342, - "241": 22343, - "aspiring": 22344, - "##mart": 22345, - "comprehend": 22346, - "iona": 22347, - "paralyzed": 22348, - "shimmering": 22349, - "swindon": 22350, - "rhone": 22351, - "##eley": 22352, - "reputed": 22353, - "configurations": 22354, - "pitchfork": 22355, - "agitation": 22356, - "francais": 22357, - "gillian": 22358, - "lipstick": 22359, - "##ilo": 22360, - "outsiders": 22361, - "pontifical": 22362, - "resisting": 22363, - "bitterness": 22364, - "sewer": 22365, - "rockies": 22366, - "##edd": 22367, - "##ucher": 22368, - "misleading": 22369, - "1756": 22370, - "exiting": 22371, - "galloway": 22372, - "##nging": 22373, - "risked": 22374, - "##heart": 22375, - "246": 22376, - "commemoration": 22377, - "schultz": 22378, - "##rka": 22379, - "integrating": 22380, - "##rsa": 22381, - "poses": 22382, - "shrieked": 22383, - "##weiler": 22384, - "guineas": 22385, - "gladys": 22386, - "jerking": 22387, - "owls": 22388, - "goldsmith": 22389, - "nightly": 22390, - "penetrating": 22391, - "##unced": 22392, - "lia": 22393, - "##33": 22394, - "ignited": 22395, - "betsy": 22396, - "##aring": 22397, - "##thorpe": 22398, - "follower": 22399, - "vigorously": 22400, - "##rave": 22401, - "coded": 22402, - "kiran": 22403, - "knit": 22404, - "zoology": 22405, - "tbilisi": 22406, - "##28": 22407, - "##bered": 22408, - "repository": 22409, - "govt": 22410, - "deciduous": 22411, - "dino": 22412, - "growling": 22413, - "##bba": 22414, - "enhancement": 22415, - "unleashed": 22416, - "chanting": 22417, - "pussy": 22418, - "biochemistry": 22419, - "##eric": 22420, - "kettle": 22421, - "repression": 22422, - "toxicity": 22423, - "nrhp": 22424, - "##arth": 22425, - "##kko": 22426, - "##bush": 22427, - "ernesto": 22428, - "commended": 22429, - "outspoken": 22430, - "242": 22431, - "mca": 22432, - "parchment": 22433, - "sms": 22434, - "kristen": 22435, - "##aton": 22436, - "bisexual": 22437, - "raked": 22438, - "glamour": 22439, - "navajo": 22440, - "a2": 22441, - "conditioned": 22442, - "showcased": 22443, - "##hma": 22444, - "spacious": 22445, - "youthful": 22446, - "##esa": 22447, - "usl": 22448, - "appliances": 22449, - "junta": 22450, - "brest": 22451, - "layne": 22452, - "conglomerate": 22453, - "enchanted": 22454, - "chao": 22455, - "loosened": 22456, - "picasso": 22457, - "circulating": 22458, - "inspect": 22459, - "montevideo": 22460, - "##centric": 22461, - "##kti": 22462, - "piazza": 22463, - "spurred": 22464, - "##aith": 22465, - "bari": 22466, - "freedoms": 22467, - "poultry": 22468, - "stamford": 22469, - "lieu": 22470, - "##ect": 22471, - "indigo": 22472, - "sarcastic": 22473, - "bahia": 22474, - "stump": 22475, - "attach": 22476, - "dvds": 22477, - "frankenstein": 22478, - "lille": 22479, - "approx": 22480, - "scriptures": 22481, - "pollen": 22482, - "##script": 22483, - "nmi": 22484, - "overseen": 22485, - "##ivism": 22486, - "tides": 22487, - "proponent": 22488, - "newmarket": 22489, - "inherit": 22490, - "milling": 22491, - "##erland": 22492, - "centralized": 22493, - "##rou": 22494, - "distributors": 22495, - "credentials": 22496, - "drawers": 22497, - "abbreviation": 22498, - "##lco": 22499, - "##xon": 22500, - "downing": 22501, - "uncomfortably": 22502, - "ripe": 22503, - "##oes": 22504, - "erase": 22505, - "franchises": 22506, - "##ever": 22507, - "populace": 22508, - "##bery": 22509, - "##khar": 22510, - "decomposition": 22511, - "pleas": 22512, - "##tet": 22513, - "daryl": 22514, - "sabah": 22515, - "##stle": 22516, - "##wide": 22517, - "fearless": 22518, - "genie": 22519, - "lesions": 22520, - "annette": 22521, - "##ogist": 22522, - "oboe": 22523, - "appendix": 22524, - "nair": 22525, - "dripped": 22526, - "petitioned": 22527, - "maclean": 22528, - "mosquito": 22529, - "parrot": 22530, - "rpg": 22531, - "hampered": 22532, - "1648": 22533, - "operatic": 22534, - "reservoirs": 22535, - "##tham": 22536, - "irrelevant": 22537, - "jolt": 22538, - "summarized": 22539, - "##fp": 22540, - "medallion": 22541, - "##taff": 22542, - "##−": 22543, - "clawed": 22544, - "harlow": 22545, - "narrower": 22546, - "goddard": 22547, - "marcia": 22548, - "bodied": 22549, - "fremont": 22550, - "suarez": 22551, - "altering": 22552, - "tempest": 22553, - "mussolini": 22554, - "porn": 22555, - "##isms": 22556, - "sweetly": 22557, - "oversees": 22558, - "walkers": 22559, - "solitude": 22560, - "grimly": 22561, - "shrines": 22562, - "hk": 22563, - "ich": 22564, - "supervisors": 22565, - "hostess": 22566, - "dietrich": 22567, - "legitimacy": 22568, - "brushes": 22569, - "expressive": 22570, - "##yp": 22571, - "dissipated": 22572, - "##rse": 22573, - "localized": 22574, - "systemic": 22575, - "##nikov": 22576, - "gettysburg": 22577, - "##js": 22578, - "##uaries": 22579, - "dialogues": 22580, - "muttering": 22581, - "251": 22582, - "housekeeper": 22583, - "sicilian": 22584, - "discouraged": 22585, - "##frey": 22586, - "beamed": 22587, - "kaladin": 22588, - "halftime": 22589, - "kidnap": 22590, - "##amo": 22591, - "##llet": 22592, - "1754": 22593, - "synonymous": 22594, - "depleted": 22595, - "instituto": 22596, - "insulin": 22597, - "reprised": 22598, - "##opsis": 22599, - "clashed": 22600, - "##ctric": 22601, - "interrupting": 22602, - "radcliffe": 22603, - "insisting": 22604, - "medici": 22605, - "1715": 22606, - "ejected": 22607, - "playfully": 22608, - "turbulent": 22609, - "##47": 22610, - "starvation": 22611, - "##rini": 22612, - "shipment": 22613, - "rebellious": 22614, - "petersen": 22615, - "verification": 22616, - "merits": 22617, - "##rified": 22618, - "cakes": 22619, - "##charged": 22620, - "1757": 22621, - "milford": 22622, - "shortages": 22623, - "spying": 22624, - "fidelity": 22625, - "##aker": 22626, - "emitted": 22627, - "storylines": 22628, - "harvested": 22629, - "seismic": 22630, - "##iform": 22631, - "cheung": 22632, - "kilda": 22633, - "theoretically": 22634, - "barbie": 22635, - "lynx": 22636, - "##rgy": 22637, - "##tius": 22638, - "goblin": 22639, - "mata": 22640, - "poisonous": 22641, - "##nburg": 22642, - "reactive": 22643, - "residues": 22644, - "obedience": 22645, - "##евич": 22646, - "conjecture": 22647, - "##rac": 22648, - "401": 22649, - "hating": 22650, - "sixties": 22651, - "kicker": 22652, - "moaning": 22653, - "motown": 22654, - "##bha": 22655, - "emancipation": 22656, - "neoclassical": 22657, - "##hering": 22658, - "consoles": 22659, - "ebert": 22660, - "professorship": 22661, - "##tures": 22662, - "sustaining": 22663, - "assaults": 22664, - "obeyed": 22665, - "affluent": 22666, - "incurred": 22667, - "tornadoes": 22668, - "##eber": 22669, - "##zow": 22670, - "emphasizing": 22671, - "highlanders": 22672, - "cheated": 22673, - "helmets": 22674, - "##ctus": 22675, - "internship": 22676, - "terence": 22677, - "bony": 22678, - "executions": 22679, - "legislators": 22680, - "berries": 22681, - "peninsular": 22682, - "tinged": 22683, - "##aco": 22684, - "1689": 22685, - "amplifier": 22686, - "corvette": 22687, - "ribbons": 22688, - "lavish": 22689, - "pennant": 22690, - "##lander": 22691, - "worthless": 22692, - "##chfield": 22693, - "##forms": 22694, - "mariano": 22695, - "pyrenees": 22696, - "expenditures": 22697, - "##icides": 22698, - "chesterfield": 22699, - "mandir": 22700, - "tailor": 22701, - "39th": 22702, - "sergey": 22703, - "nestled": 22704, - "willed": 22705, - "aristocracy": 22706, - "devotees": 22707, - "goodnight": 22708, - "raaf": 22709, - "rumored": 22710, - "weaponry": 22711, - "remy": 22712, - "appropriations": 22713, - "harcourt": 22714, - "burr": 22715, - "riaa": 22716, - "##lence": 22717, - "limitation": 22718, - "unnoticed": 22719, - "guo": 22720, - "soaking": 22721, - "swamps": 22722, - "##tica": 22723, - "collapsing": 22724, - "tatiana": 22725, - "descriptive": 22726, - "brigham": 22727, - "psalm": 22728, - "##chment": 22729, - "maddox": 22730, - "##lization": 22731, - "patti": 22732, - "caliph": 22733, - "##aja": 22734, - "akron": 22735, - "injuring": 22736, - "serra": 22737, - "##ganj": 22738, - "basins": 22739, - "##sari": 22740, - "astonished": 22741, - "launcher": 22742, - "##church": 22743, - "hilary": 22744, - "wilkins": 22745, - "sewing": 22746, - "##sf": 22747, - "stinging": 22748, - "##fia": 22749, - "##ncia": 22750, - "underwood": 22751, - "startup": 22752, - "##ition": 22753, - "compilations": 22754, - "vibrations": 22755, - "embankment": 22756, - "jurist": 22757, - "##nity": 22758, - "bard": 22759, - "juventus": 22760, - "groundwater": 22761, - "kern": 22762, - "palaces": 22763, - "helium": 22764, - "boca": 22765, - "cramped": 22766, - "marissa": 22767, - "soto": 22768, - "##worm": 22769, - "jae": 22770, - "princely": 22771, - "##ggy": 22772, - "faso": 22773, - "bazaar": 22774, - "warmly": 22775, - "##voking": 22776, - "229": 22777, - "pairing": 22778, - "##lite": 22779, - "##grate": 22780, - "##nets": 22781, - "wien": 22782, - "freaked": 22783, - "ulysses": 22784, - "rebirth": 22785, - "##alia": 22786, - "##rent": 22787, - "mummy": 22788, - "guzman": 22789, - "jimenez": 22790, - "stilled": 22791, - "##nitz": 22792, - "trajectory": 22793, - "tha": 22794, - "woken": 22795, - "archival": 22796, - "professions": 22797, - "##pts": 22798, - "##pta": 22799, - "hilly": 22800, - "shadowy": 22801, - "shrink": 22802, - "##bolt": 22803, - "norwood": 22804, - "glued": 22805, - "migrate": 22806, - "stereotypes": 22807, - "devoid": 22808, - "##pheus": 22809, - "625": 22810, - "evacuate": 22811, - "horrors": 22812, - "infancy": 22813, - "gotham": 22814, - "knowles": 22815, - "optic": 22816, - "downloaded": 22817, - "sachs": 22818, - "kingsley": 22819, - "parramatta": 22820, - "darryl": 22821, - "mor": 22822, - "##onale": 22823, - "shady": 22824, - "commence": 22825, - "confesses": 22826, - "kan": 22827, - "##meter": 22828, - "##placed": 22829, - "marlborough": 22830, - "roundabout": 22831, - "regents": 22832, - "frigates": 22833, - "io": 22834, - "##imating": 22835, - "gothenburg": 22836, - "revoked": 22837, - "carvings": 22838, - "clockwise": 22839, - "convertible": 22840, - "intruder": 22841, - "##sche": 22842, - "banged": 22843, - "##ogo": 22844, - "vicky": 22845, - "bourgeois": 22846, - "##mony": 22847, - "dupont": 22848, - "footing": 22849, - "##gum": 22850, - "pd": 22851, - "##real": 22852, - "buckle": 22853, - "yun": 22854, - "penthouse": 22855, - "sane": 22856, - "720": 22857, - "serviced": 22858, - "stakeholders": 22859, - "neumann": 22860, - "bb": 22861, - "##eers": 22862, - "comb": 22863, - "##gam": 22864, - "catchment": 22865, - "pinning": 22866, - "rallies": 22867, - "typing": 22868, - "##elles": 22869, - "forefront": 22870, - "freiburg": 22871, - "sweetie": 22872, - "giacomo": 22873, - "widowed": 22874, - "goodwill": 22875, - "worshipped": 22876, - "aspirations": 22877, - "midday": 22878, - "##vat": 22879, - "fishery": 22880, - "##trick": 22881, - "bournemouth": 22882, - "turk": 22883, - "243": 22884, - "hearth": 22885, - "ethanol": 22886, - "guadalajara": 22887, - "murmurs": 22888, - "sl": 22889, - "##uge": 22890, - "afforded": 22891, - "scripted": 22892, - "##hta": 22893, - "wah": 22894, - "##jn": 22895, - "coroner": 22896, - "translucent": 22897, - "252": 22898, - "memorials": 22899, - "puck": 22900, - "progresses": 22901, - "clumsy": 22902, - "##race": 22903, - "315": 22904, - "candace": 22905, - "recounted": 22906, - "##27": 22907, - "##slin": 22908, - "##uve": 22909, - "filtering": 22910, - "##mac": 22911, - "howl": 22912, - "strata": 22913, - "heron": 22914, - "leveled": 22915, - "##ays": 22916, - "dubious": 22917, - "##oja": 22918, - "##т": 22919, - "##wheel": 22920, - "citations": 22921, - "exhibiting": 22922, - "##laya": 22923, - "##mics": 22924, - "##pods": 22925, - "turkic": 22926, - "##lberg": 22927, - "injunction": 22928, - "##ennial": 22929, - "##mit": 22930, - "antibodies": 22931, - "##44": 22932, - "organise": 22933, - "##rigues": 22934, - "cardiovascular": 22935, - "cushion": 22936, - "inverness": 22937, - "##zquez": 22938, - "dia": 22939, - "cocoa": 22940, - "sibling": 22941, - "##tman": 22942, - "##roid": 22943, - "expanse": 22944, - "feasible": 22945, - "tunisian": 22946, - "algiers": 22947, - "##relli": 22948, - "rus": 22949, - "bloomberg": 22950, - "dso": 22951, - "westphalia": 22952, - "bro": 22953, - "tacoma": 22954, - "281": 22955, - "downloads": 22956, - "##ours": 22957, - "konrad": 22958, - "duran": 22959, - "##hdi": 22960, - "continuum": 22961, - "jett": 22962, - "compares": 22963, - "legislator": 22964, - "secession": 22965, - "##nable": 22966, - "##gues": 22967, - "##zuka": 22968, - "translating": 22969, - "reacher": 22970, - "##gley": 22971, - "##ła": 22972, - "aleppo": 22973, - "##agi": 22974, - "tc": 22975, - "orchards": 22976, - "trapping": 22977, - "linguist": 22978, - "versatile": 22979, - "drumming": 22980, - "postage": 22981, - "calhoun": 22982, - "superiors": 22983, - "##mx": 22984, - "barefoot": 22985, - "leary": 22986, - "##cis": 22987, - "ignacio": 22988, - "alfa": 22989, - "kaplan": 22990, - "##rogen": 22991, - "bratislava": 22992, - "mori": 22993, - "##vot": 22994, - "disturb": 22995, - "haas": 22996, - "313": 22997, - "cartridges": 22998, - "gilmore": 22999, - "radiated": 23000, - "salford": 23001, - "tunic": 23002, - "hades": 23003, - "##ulsive": 23004, - "archeological": 23005, - "delilah": 23006, - "magistrates": 23007, - "auditioned": 23008, - "brewster": 23009, - "charters": 23010, - "empowerment": 23011, - "blogs": 23012, - "cappella": 23013, - "dynasties": 23014, - "iroquois": 23015, - "whipping": 23016, - "##krishna": 23017, - "raceway": 23018, - "truths": 23019, - "myra": 23020, - "weaken": 23021, - "judah": 23022, - "mcgregor": 23023, - "##horse": 23024, - "mic": 23025, - "refueling": 23026, - "37th": 23027, - "burnley": 23028, - "bosses": 23029, - "markus": 23030, - "premio": 23031, - "query": 23032, - "##gga": 23033, - "dunbar": 23034, - "##economic": 23035, - "darkest": 23036, - "lyndon": 23037, - "sealing": 23038, - "commendation": 23039, - "reappeared": 23040, - "##mun": 23041, - "addicted": 23042, - "ezio": 23043, - "slaughtered": 23044, - "satisfactory": 23045, - "shuffle": 23046, - "##eves": 23047, - "##thic": 23048, - "##uj": 23049, - "fortification": 23050, - "warrington": 23051, - "##otto": 23052, - "resurrected": 23053, - "fargo": 23054, - "mane": 23055, - "##utable": 23056, - "##lei": 23057, - "##space": 23058, - "foreword": 23059, - "ox": 23060, - "##aris": 23061, - "##vern": 23062, - "abrams": 23063, - "hua": 23064, - "##mento": 23065, - "sakura": 23066, - "##alo": 23067, - "uv": 23068, - "sentimental": 23069, - "##skaya": 23070, - "midfield": 23071, - "##eses": 23072, - "sturdy": 23073, - "scrolls": 23074, - "macleod": 23075, - "##kyu": 23076, - "entropy": 23077, - "##lance": 23078, - "mitochondrial": 23079, - "cicero": 23080, - "excelled": 23081, - "thinner": 23082, - "convoys": 23083, - "perceive": 23084, - "##oslav": 23085, - "##urable": 23086, - "systematically": 23087, - "grind": 23088, - "burkina": 23089, - "287": 23090, - "##tagram": 23091, - "ops": 23092, - "##aman": 23093, - "guantanamo": 23094, - "##cloth": 23095, - "##tite": 23096, - "forcefully": 23097, - "wavy": 23098, - "##jou": 23099, - "pointless": 23100, - "##linger": 23101, - "##tze": 23102, - "layton": 23103, - "portico": 23104, - "superficial": 23105, - "clerical": 23106, - "outlaws": 23107, - "##hism": 23108, - "burials": 23109, - "muir": 23110, - "##inn": 23111, - "creditors": 23112, - "hauling": 23113, - "rattle": 23114, - "##leg": 23115, - "calais": 23116, - "monde": 23117, - "archers": 23118, - "reclaimed": 23119, - "dwell": 23120, - "wexford": 23121, - "hellenic": 23122, - "falsely": 23123, - "remorse": 23124, - "##tek": 23125, - "dough": 23126, - "furnishings": 23127, - "##uttered": 23128, - "gabon": 23129, - "neurological": 23130, - "novice": 23131, - "##igraphy": 23132, - "contemplated": 23133, - "pulpit": 23134, - "nightstand": 23135, - "saratoga": 23136, - "##istan": 23137, - "documenting": 23138, - "pulsing": 23139, - "taluk": 23140, - "##firmed": 23141, - "busted": 23142, - "marital": 23143, - "##rien": 23144, - "disagreements": 23145, - "wasps": 23146, - "##yes": 23147, - "hodge": 23148, - "mcdonnell": 23149, - "mimic": 23150, - "fran": 23151, - "pendant": 23152, - "dhabi": 23153, - "musa": 23154, - "##nington": 23155, - "congratulations": 23156, - "argent": 23157, - "darrell": 23158, - "concussion": 23159, - "losers": 23160, - "regrets": 23161, - "thessaloniki": 23162, - "reversal": 23163, - "donaldson": 23164, - "hardwood": 23165, - "thence": 23166, - "achilles": 23167, - "ritter": 23168, - "##eran": 23169, - "demonic": 23170, - "jurgen": 23171, - "prophets": 23172, - "goethe": 23173, - "eki": 23174, - "classmate": 23175, - "buff": 23176, - "##cking": 23177, - "yank": 23178, - "irrational": 23179, - "##inging": 23180, - "perished": 23181, - "seductive": 23182, - "qur": 23183, - "sourced": 23184, - "##crat": 23185, - "##typic": 23186, - "mustard": 23187, - "ravine": 23188, - "barre": 23189, - "horizontally": 23190, - "characterization": 23191, - "phylogenetic": 23192, - "boise": 23193, - "##dit": 23194, - "##runner": 23195, - "##tower": 23196, - "brutally": 23197, - "intercourse": 23198, - "seduce": 23199, - "##bbing": 23200, - "fay": 23201, - "ferris": 23202, - "ogden": 23203, - "amar": 23204, - "nik": 23205, - "unarmed": 23206, - "##inator": 23207, - "evaluating": 23208, - "kyrgyzstan": 23209, - "sweetness": 23210, - "##lford": 23211, - "##oki": 23212, - "mccormick": 23213, - "meiji": 23214, - "notoriety": 23215, - "stimulate": 23216, - "disrupt": 23217, - "figuring": 23218, - "instructional": 23219, - "mcgrath": 23220, - "##zoo": 23221, - "groundbreaking": 23222, - "##lto": 23223, - "flinch": 23224, - "khorasan": 23225, - "agrarian": 23226, - "bengals": 23227, - "mixer": 23228, - "radiating": 23229, - "##sov": 23230, - "ingram": 23231, - "pitchers": 23232, - "nad": 23233, - "tariff": 23234, - "##cript": 23235, - "tata": 23236, - "##codes": 23237, - "##emi": 23238, - "##ungen": 23239, - "appellate": 23240, - "lehigh": 23241, - "##bled": 23242, - "##giri": 23243, - "brawl": 23244, - "duct": 23245, - "texans": 23246, - "##ciation": 23247, - "##ropolis": 23248, - "skipper": 23249, - "speculative": 23250, - "vomit": 23251, - "doctrines": 23252, - "stresses": 23253, - "253": 23254, - "davy": 23255, - "graders": 23256, - "whitehead": 23257, - "jozef": 23258, - "timely": 23259, - "cumulative": 23260, - "haryana": 23261, - "paints": 23262, - "appropriately": 23263, - "boon": 23264, - "cactus": 23265, - "##ales": 23266, - "##pid": 23267, - "dow": 23268, - "legions": 23269, - "##pit": 23270, - "perceptions": 23271, - "1730": 23272, - "picturesque": 23273, - "##yse": 23274, - "periphery": 23275, - "rune": 23276, - "wr": 23277, - "##aha": 23278, - "celtics": 23279, - "sentencing": 23280, - "whoa": 23281, - "##erin": 23282, - "confirms": 23283, - "variance": 23284, - "425": 23285, - "moines": 23286, - "mathews": 23287, - "spade": 23288, - "rave": 23289, - "m1": 23290, - "fronted": 23291, - "fx": 23292, - "blending": 23293, - "alleging": 23294, - "reared": 23295, - "##gl": 23296, - "237": 23297, - "##paper": 23298, - "grassroots": 23299, - "eroded": 23300, - "##free": 23301, - "##physical": 23302, - "directs": 23303, - "ordeal": 23304, - "##sław": 23305, - "accelerate": 23306, - "hacker": 23307, - "rooftop": 23308, - "##inia": 23309, - "lev": 23310, - "buys": 23311, - "cebu": 23312, - "devote": 23313, - "##lce": 23314, - "specialising": 23315, - "##ulsion": 23316, - "choreographed": 23317, - "repetition": 23318, - "warehouses": 23319, - "##ryl": 23320, - "paisley": 23321, - "tuscany": 23322, - "analogy": 23323, - "sorcerer": 23324, - "hash": 23325, - "huts": 23326, - "shards": 23327, - "descends": 23328, - "exclude": 23329, - "nix": 23330, - "chaplin": 23331, - "gaga": 23332, - "ito": 23333, - "vane": 23334, - "##drich": 23335, - "causeway": 23336, - "misconduct": 23337, - "limo": 23338, - "orchestrated": 23339, - "glands": 23340, - "jana": 23341, - "##kot": 23342, - "u2": 23343, - "##mple": 23344, - "##sons": 23345, - "branching": 23346, - "contrasts": 23347, - "scoop": 23348, - "longed": 23349, - "##virus": 23350, - "chattanooga": 23351, - "##75": 23352, - "syrup": 23353, - "cornerstone": 23354, - "##tized": 23355, - "##mind": 23356, - "##iaceae": 23357, - "careless": 23358, - "precedence": 23359, - "frescoes": 23360, - "##uet": 23361, - "chilled": 23362, - "consult": 23363, - "modelled": 23364, - "snatch": 23365, - "peat": 23366, - "##thermal": 23367, - "caucasian": 23368, - "humane": 23369, - "relaxation": 23370, - "spins": 23371, - "temperance": 23372, - "##lbert": 23373, - "occupations": 23374, - "lambda": 23375, - "hybrids": 23376, - "moons": 23377, - "mp3": 23378, - "##oese": 23379, - "247": 23380, - "rolf": 23381, - "societal": 23382, - "yerevan": 23383, - "ness": 23384, - "##ssler": 23385, - "befriended": 23386, - "mechanized": 23387, - "nominate": 23388, - "trough": 23389, - "boasted": 23390, - "cues": 23391, - "seater": 23392, - "##hom": 23393, - "bends": 23394, - "##tangle": 23395, - "conductors": 23396, - "emptiness": 23397, - "##lmer": 23398, - "eurasian": 23399, - "adriatic": 23400, - "tian": 23401, - "##cie": 23402, - "anxiously": 23403, - "lark": 23404, - "propellers": 23405, - "chichester": 23406, - "jock": 23407, - "ev": 23408, - "2a": 23409, - "##holding": 23410, - "credible": 23411, - "recounts": 23412, - "tori": 23413, - "loyalist": 23414, - "abduction": 23415, - "##hoot": 23416, - "##redo": 23417, - "nepali": 23418, - "##mite": 23419, - "ventral": 23420, - "tempting": 23421, - "##ango": 23422, - "##crats": 23423, - "steered": 23424, - "##wice": 23425, - "javelin": 23426, - "dipping": 23427, - "laborers": 23428, - "prentice": 23429, - "looming": 23430, - "titanium": 23431, - "##ː": 23432, - "badges": 23433, - "emir": 23434, - "tensor": 23435, - "##ntation": 23436, - "egyptians": 23437, - "rash": 23438, - "denies": 23439, - "hawthorne": 23440, - "lombard": 23441, - "showers": 23442, - "wehrmacht": 23443, - "dietary": 23444, - "trojan": 23445, - "##reus": 23446, - "welles": 23447, - "executing": 23448, - "horseshoe": 23449, - "lifeboat": 23450, - "##lak": 23451, - "elsa": 23452, - "infirmary": 23453, - "nearing": 23454, - "roberta": 23455, - "boyer": 23456, - "mutter": 23457, - "trillion": 23458, - "joanne": 23459, - "##fine": 23460, - "##oked": 23461, - "sinks": 23462, - "vortex": 23463, - "uruguayan": 23464, - "clasp": 23465, - "sirius": 23466, - "##block": 23467, - "accelerator": 23468, - "prohibit": 23469, - "sunken": 23470, - "byu": 23471, - "chronological": 23472, - "diplomats": 23473, - "ochreous": 23474, - "510": 23475, - "symmetrical": 23476, - "1644": 23477, - "maia": 23478, - "##tology": 23479, - "salts": 23480, - "reigns": 23481, - "atrocities": 23482, - "##ия": 23483, - "hess": 23484, - "bared": 23485, - "issn": 23486, - "##vyn": 23487, - "cater": 23488, - "saturated": 23489, - "##cycle": 23490, - "##isse": 23491, - "sable": 23492, - "voyager": 23493, - "dyer": 23494, - "yusuf": 23495, - "##inge": 23496, - "fountains": 23497, - "wolff": 23498, - "##39": 23499, - "##nni": 23500, - "engraving": 23501, - "rollins": 23502, - "atheist": 23503, - "ominous": 23504, - "##ault": 23505, - "herr": 23506, - "chariot": 23507, - "martina": 23508, - "strung": 23509, - "##fell": 23510, - "##farlane": 23511, - "horrific": 23512, - "sahib": 23513, - "gazes": 23514, - "saetan": 23515, - "erased": 23516, - "ptolemy": 23517, - "##olic": 23518, - "flushing": 23519, - "lauderdale": 23520, - "analytic": 23521, - "##ices": 23522, - "530": 23523, - "navarro": 23524, - "beak": 23525, - "gorilla": 23526, - "herrera": 23527, - "broom": 23528, - "guadalupe": 23529, - "raiding": 23530, - "sykes": 23531, - "311": 23532, - "bsc": 23533, - "deliveries": 23534, - "1720": 23535, - "invasions": 23536, - "carmichael": 23537, - "tajikistan": 23538, - "thematic": 23539, - "ecumenical": 23540, - "sentiments": 23541, - "onstage": 23542, - "##rians": 23543, - "##brand": 23544, - "##sume": 23545, - "catastrophic": 23546, - "flanks": 23547, - "molten": 23548, - "##arns": 23549, - "waller": 23550, - "aimee": 23551, - "terminating": 23552, - "##icing": 23553, - "alternately": 23554, - "##oche": 23555, - "nehru": 23556, - "printers": 23557, - "outraged": 23558, - "##eving": 23559, - "empires": 23560, - "template": 23561, - "banners": 23562, - "repetitive": 23563, - "za": 23564, - "##oise": 23565, - "vegetarian": 23566, - "##tell": 23567, - "guiana": 23568, - "opt": 23569, - "cavendish": 23570, - "lucknow": 23571, - "synthesized": 23572, - "##hani": 23573, - "##mada": 23574, - "finalized": 23575, - "##ctable": 23576, - "fictitious": 23577, - "mayoral": 23578, - "unreliable": 23579, - "##enham": 23580, - "embracing": 23581, - "peppers": 23582, - "rbis": 23583, - "##chio": 23584, - "##neo": 23585, - "inhibition": 23586, - "slashed": 23587, - "togo": 23588, - "orderly": 23589, - "embroidered": 23590, - "safari": 23591, - "salty": 23592, - "236": 23593, - "barron": 23594, - "benito": 23595, - "totaled": 23596, - "##dak": 23597, - "pubs": 23598, - "simulated": 23599, - "caden": 23600, - "devin": 23601, - "tolkien": 23602, - "momma": 23603, - "welding": 23604, - "sesame": 23605, - "##ept": 23606, - "gottingen": 23607, - "hardness": 23608, - "630": 23609, - "shaman": 23610, - "temeraire": 23611, - "620": 23612, - "adequately": 23613, - "pediatric": 23614, - "##kit": 23615, - "ck": 23616, - "assertion": 23617, - "radicals": 23618, - "composure": 23619, - "cadence": 23620, - "seafood": 23621, - "beaufort": 23622, - "lazarus": 23623, - "mani": 23624, - "warily": 23625, - "cunning": 23626, - "kurdistan": 23627, - "249": 23628, - "cantata": 23629, - "##kir": 23630, - "ares": 23631, - "##41": 23632, - "##clusive": 23633, - "nape": 23634, - "townland": 23635, - "geared": 23636, - "insulted": 23637, - "flutter": 23638, - "boating": 23639, - "violate": 23640, - "draper": 23641, - "dumping": 23642, - "malmo": 23643, - "##hh": 23644, - "##romatic": 23645, - "firearm": 23646, - "alta": 23647, - "bono": 23648, - "obscured": 23649, - "##clave": 23650, - "exceeds": 23651, - "panorama": 23652, - "unbelievable": 23653, - "##train": 23654, - "preschool": 23655, - "##essed": 23656, - "disconnected": 23657, - "installing": 23658, - "rescuing": 23659, - "secretaries": 23660, - "accessibility": 23661, - "##castle": 23662, - "##drive": 23663, - "##ifice": 23664, - "##film": 23665, - "bouts": 23666, - "slug": 23667, - "waterway": 23668, - "mindanao": 23669, - "##buro": 23670, - "##ratic": 23671, - "halves": 23672, - "##ل": 23673, - "calming": 23674, - "liter": 23675, - "maternity": 23676, - "adorable": 23677, - "bragg": 23678, - "electrification": 23679, - "mcc": 23680, - "##dote": 23681, - "roxy": 23682, - "schizophrenia": 23683, - "##body": 23684, - "munoz": 23685, - "kaye": 23686, - "whaling": 23687, - "239": 23688, - "mil": 23689, - "tingling": 23690, - "tolerant": 23691, - "##ago": 23692, - "unconventional": 23693, - "volcanoes": 23694, - "##finder": 23695, - "deportivo": 23696, - "##llie": 23697, - "robson": 23698, - "kaufman": 23699, - "neuroscience": 23700, - "wai": 23701, - "deportation": 23702, - "masovian": 23703, - "scraping": 23704, - "converse": 23705, - "##bh": 23706, - "hacking": 23707, - "bulge": 23708, - "##oun": 23709, - "administratively": 23710, - "yao": 23711, - "580": 23712, - "amp": 23713, - "mammoth": 23714, - "booster": 23715, - "claremont": 23716, - "hooper": 23717, - "nomenclature": 23718, - "pursuits": 23719, - "mclaughlin": 23720, - "melinda": 23721, - "##sul": 23722, - "catfish": 23723, - "barclay": 23724, - "substrates": 23725, - "taxa": 23726, - "zee": 23727, - "originals": 23728, - "kimberly": 23729, - "packets": 23730, - "padma": 23731, - "##ality": 23732, - "borrowing": 23733, - "ostensibly": 23734, - "solvent": 23735, - "##bri": 23736, - "##genesis": 23737, - "##mist": 23738, - "lukas": 23739, - "shreveport": 23740, - "veracruz": 23741, - "##ь": 23742, - "##lou": 23743, - "##wives": 23744, - "cheney": 23745, - "tt": 23746, - "anatolia": 23747, - "hobbs": 23748, - "##zyn": 23749, - "cyclic": 23750, - "radiant": 23751, - "alistair": 23752, - "greenish": 23753, - "siena": 23754, - "dat": 23755, - "independents": 23756, - "##bation": 23757, - "conform": 23758, - "pieter": 23759, - "hyper": 23760, - "applicant": 23761, - "bradshaw": 23762, - "spores": 23763, - "telangana": 23764, - "vinci": 23765, - "inexpensive": 23766, - "nuclei": 23767, - "322": 23768, - "jang": 23769, - "nme": 23770, - "soho": 23771, - "spd": 23772, - "##ign": 23773, - "cradled": 23774, - "receptionist": 23775, - "pow": 23776, - "##43": 23777, - "##rika": 23778, - "fascism": 23779, - "##ifer": 23780, - "experimenting": 23781, - "##ading": 23782, - "##iec": 23783, - "##region": 23784, - "345": 23785, - "jocelyn": 23786, - "maris": 23787, - "stair": 23788, - "nocturnal": 23789, - "toro": 23790, - "constabulary": 23791, - "elgin": 23792, - "##kker": 23793, - "msc": 23794, - "##giving": 23795, - "##schen": 23796, - "##rase": 23797, - "doherty": 23798, - "doping": 23799, - "sarcastically": 23800, - "batter": 23801, - "maneuvers": 23802, - "##cano": 23803, - "##apple": 23804, - "##gai": 23805, - "##git": 23806, - "intrinsic": 23807, - "##nst": 23808, - "##stor": 23809, - "1753": 23810, - "showtime": 23811, - "cafes": 23812, - "gasps": 23813, - "lviv": 23814, - "ushered": 23815, - "##thed": 23816, - "fours": 23817, - "restart": 23818, - "astonishment": 23819, - "transmitting": 23820, - "flyer": 23821, - "shrugs": 23822, - "##sau": 23823, - "intriguing": 23824, - "cones": 23825, - "dictated": 23826, - "mushrooms": 23827, - "medial": 23828, - "##kovsky": 23829, - "##elman": 23830, - "escorting": 23831, - "gaped": 23832, - "##26": 23833, - "godfather": 23834, - "##door": 23835, - "##sell": 23836, - "djs": 23837, - "recaptured": 23838, - "timetable": 23839, - "vila": 23840, - "1710": 23841, - "3a": 23842, - "aerodrome": 23843, - "mortals": 23844, - "scientology": 23845, - "##orne": 23846, - "angelina": 23847, - "mag": 23848, - "convection": 23849, - "unpaid": 23850, - "insertion": 23851, - "intermittent": 23852, - "lego": 23853, - "##nated": 23854, - "endeavor": 23855, - "kota": 23856, - "pereira": 23857, - "##lz": 23858, - "304": 23859, - "bwv": 23860, - "glamorgan": 23861, - "insults": 23862, - "agatha": 23863, - "fey": 23864, - "##cend": 23865, - "fleetwood": 23866, - "mahogany": 23867, - "protruding": 23868, - "steamship": 23869, - "zeta": 23870, - "##arty": 23871, - "mcguire": 23872, - "suspense": 23873, - "##sphere": 23874, - "advising": 23875, - "urges": 23876, - "##wala": 23877, - "hurriedly": 23878, - "meteor": 23879, - "gilded": 23880, - "inline": 23881, - "arroyo": 23882, - "stalker": 23883, - "##oge": 23884, - "excitedly": 23885, - "revered": 23886, - "##cure": 23887, - "earle": 23888, - "introductory": 23889, - "##break": 23890, - "##ilde": 23891, - "mutants": 23892, - "puff": 23893, - "pulses": 23894, - "reinforcement": 23895, - "##haling": 23896, - "curses": 23897, - "lizards": 23898, - "stalk": 23899, - "correlated": 23900, - "##fixed": 23901, - "fallout": 23902, - "macquarie": 23903, - "##unas": 23904, - "bearded": 23905, - "denton": 23906, - "heaving": 23907, - "802": 23908, - "##ocation": 23909, - "winery": 23910, - "assign": 23911, - "dortmund": 23912, - "##lkirk": 23913, - "everest": 23914, - "invariant": 23915, - "charismatic": 23916, - "susie": 23917, - "##elling": 23918, - "bled": 23919, - "lesley": 23920, - "telegram": 23921, - "sumner": 23922, - "bk": 23923, - "##ogen": 23924, - "##к": 23925, - "wilcox": 23926, - "needy": 23927, - "colbert": 23928, - "duval": 23929, - "##iferous": 23930, - "##mbled": 23931, - "allotted": 23932, - "attends": 23933, - "imperative": 23934, - "##hita": 23935, - "replacements": 23936, - "hawker": 23937, - "##inda": 23938, - "insurgency": 23939, - "##zee": 23940, - "##eke": 23941, - "casts": 23942, - "##yla": 23943, - "680": 23944, - "ives": 23945, - "transitioned": 23946, - "##pack": 23947, - "##powering": 23948, - "authoritative": 23949, - "baylor": 23950, - "flex": 23951, - "cringed": 23952, - "plaintiffs": 23953, - "woodrow": 23954, - "##skie": 23955, - "drastic": 23956, - "ape": 23957, - "aroma": 23958, - "unfolded": 23959, - "commotion": 23960, - "nt": 23961, - "preoccupied": 23962, - "theta": 23963, - "routines": 23964, - "lasers": 23965, - "privatization": 23966, - "wand": 23967, - "domino": 23968, - "ek": 23969, - "clenching": 23970, - "nsa": 23971, - "strategically": 23972, - "showered": 23973, - "bile": 23974, - "handkerchief": 23975, - "pere": 23976, - "storing": 23977, - "christophe": 23978, - "insulting": 23979, - "316": 23980, - "nakamura": 23981, - "romani": 23982, - "asiatic": 23983, - "magdalena": 23984, - "palma": 23985, - "cruises": 23986, - "stripping": 23987, - "405": 23988, - "konstantin": 23989, - "soaring": 23990, - "##berman": 23991, - "colloquially": 23992, - "forerunner": 23993, - "havilland": 23994, - "incarcerated": 23995, - "parasites": 23996, - "sincerity": 23997, - "##utus": 23998, - "disks": 23999, - "plank": 24000, - "saigon": 24001, - "##ining": 24002, - "corbin": 24003, - "homo": 24004, - "ornaments": 24005, - "powerhouse": 24006, - "##tlement": 24007, - "chong": 24008, - "fastened": 24009, - "feasibility": 24010, - "idf": 24011, - "morphological": 24012, - "usable": 24013, - "##nish": 24014, - "##zuki": 24015, - "aqueduct": 24016, - "jaguars": 24017, - "keepers": 24018, - "##flies": 24019, - "aleksandr": 24020, - "faust": 24021, - "assigns": 24022, - "ewing": 24023, - "bacterium": 24024, - "hurled": 24025, - "tricky": 24026, - "hungarians": 24027, - "integers": 24028, - "wallis": 24029, - "321": 24030, - "yamaha": 24031, - "##isha": 24032, - "hushed": 24033, - "oblivion": 24034, - "aviator": 24035, - "evangelist": 24036, - "friars": 24037, - "##eller": 24038, - "monograph": 24039, - "ode": 24040, - "##nary": 24041, - "airplanes": 24042, - "labourers": 24043, - "charms": 24044, - "##nee": 24045, - "1661": 24046, - "hagen": 24047, - "tnt": 24048, - "rudder": 24049, - "fiesta": 24050, - "transcript": 24051, - "dorothea": 24052, - "ska": 24053, - "inhibitor": 24054, - "maccabi": 24055, - "retorted": 24056, - "raining": 24057, - "encompassed": 24058, - "clauses": 24059, - "menacing": 24060, - "1642": 24061, - "lineman": 24062, - "##gist": 24063, - "vamps": 24064, - "##ape": 24065, - "##dick": 24066, - "gloom": 24067, - "##rera": 24068, - "dealings": 24069, - "easing": 24070, - "seekers": 24071, - "##nut": 24072, - "##pment": 24073, - "helens": 24074, - "unmanned": 24075, - "##anu": 24076, - "##isson": 24077, - "basics": 24078, - "##amy": 24079, - "##ckman": 24080, - "adjustments": 24081, - "1688": 24082, - "brutality": 24083, - "horne": 24084, - "##zell": 24085, - "sui": 24086, - "##55": 24087, - "##mable": 24088, - "aggregator": 24089, - "##thal": 24090, - "rhino": 24091, - "##drick": 24092, - "##vira": 24093, - "counters": 24094, - "zoom": 24095, - "##01": 24096, - "##rting": 24097, - "mn": 24098, - "montenegrin": 24099, - "packard": 24100, - "##unciation": 24101, - "##♭": 24102, - "##kki": 24103, - "reclaim": 24104, - "scholastic": 24105, - "thugs": 24106, - "pulsed": 24107, - "##icia": 24108, - "syriac": 24109, - "quan": 24110, - "saddam": 24111, - "banda": 24112, - "kobe": 24113, - "blaming": 24114, - "buddies": 24115, - "dissent": 24116, - "##lusion": 24117, - "##usia": 24118, - "corbett": 24119, - "jaya": 24120, - "delle": 24121, - "erratic": 24122, - "lexie": 24123, - "##hesis": 24124, - "435": 24125, - "amiga": 24126, - "hermes": 24127, - "##pressing": 24128, - "##leen": 24129, - "chapels": 24130, - "gospels": 24131, - "jamal": 24132, - "##uating": 24133, - "compute": 24134, - "revolving": 24135, - "warp": 24136, - "##sso": 24137, - "##thes": 24138, - "armory": 24139, - "##eras": 24140, - "##gol": 24141, - "antrim": 24142, - "loki": 24143, - "##kow": 24144, - "##asian": 24145, - "##good": 24146, - "##zano": 24147, - "braid": 24148, - "handwriting": 24149, - "subdistrict": 24150, - "funky": 24151, - "pantheon": 24152, - "##iculate": 24153, - "concurrency": 24154, - "estimation": 24155, - "improper": 24156, - "juliana": 24157, - "##his": 24158, - "newcomers": 24159, - "johnstone": 24160, - "staten": 24161, - "communicated": 24162, - "##oco": 24163, - "##alle": 24164, - "sausage": 24165, - "stormy": 24166, - "##stered": 24167, - "##tters": 24168, - "superfamily": 24169, - "##grade": 24170, - "acidic": 24171, - "collateral": 24172, - "tabloid": 24173, - "##oped": 24174, - "##rza": 24175, - "bladder": 24176, - "austen": 24177, - "##ellant": 24178, - "mcgraw": 24179, - "##hay": 24180, - "hannibal": 24181, - "mein": 24182, - "aquino": 24183, - "lucifer": 24184, - "wo": 24185, - "badger": 24186, - "boar": 24187, - "cher": 24188, - "christensen": 24189, - "greenberg": 24190, - "interruption": 24191, - "##kken": 24192, - "jem": 24193, - "244": 24194, - "mocked": 24195, - "bottoms": 24196, - "cambridgeshire": 24197, - "##lide": 24198, - "sprawling": 24199, - "##bbly": 24200, - "eastwood": 24201, - "ghent": 24202, - "synth": 24203, - "##buck": 24204, - "advisers": 24205, - "##bah": 24206, - "nominally": 24207, - "hapoel": 24208, - "qu": 24209, - "daggers": 24210, - "estranged": 24211, - "fabricated": 24212, - "towels": 24213, - "vinnie": 24214, - "wcw": 24215, - "misunderstanding": 24216, - "anglia": 24217, - "nothin": 24218, - "unmistakable": 24219, - "##dust": 24220, - "##lova": 24221, - "chilly": 24222, - "marquette": 24223, - "truss": 24224, - "##edge": 24225, - "##erine": 24226, - "reece": 24227, - "##lty": 24228, - "##chemist": 24229, - "##connected": 24230, - "272": 24231, - "308": 24232, - "41st": 24233, - "bash": 24234, - "raion": 24235, - "waterfalls": 24236, - "##ump": 24237, - "##main": 24238, - "labyrinth": 24239, - "queue": 24240, - "theorist": 24241, - "##istle": 24242, - "bharatiya": 24243, - "flexed": 24244, - "soundtracks": 24245, - "rooney": 24246, - "leftist": 24247, - "patrolling": 24248, - "wharton": 24249, - "plainly": 24250, - "alleviate": 24251, - "eastman": 24252, - "schuster": 24253, - "topographic": 24254, - "engages": 24255, - "immensely": 24256, - "unbearable": 24257, - "fairchild": 24258, - "1620": 24259, - "dona": 24260, - "lurking": 24261, - "parisian": 24262, - "oliveira": 24263, - "ia": 24264, - "indictment": 24265, - "hahn": 24266, - "bangladeshi": 24267, - "##aster": 24268, - "vivo": 24269, - "##uming": 24270, - "##ential": 24271, - "antonia": 24272, - "expects": 24273, - "indoors": 24274, - "kildare": 24275, - "harlan": 24276, - "##logue": 24277, - "##ogenic": 24278, - "##sities": 24279, - "forgiven": 24280, - "##wat": 24281, - "childish": 24282, - "tavi": 24283, - "##mide": 24284, - "##orra": 24285, - "plausible": 24286, - "grimm": 24287, - "successively": 24288, - "scooted": 24289, - "##bola": 24290, - "##dget": 24291, - "##rith": 24292, - "spartans": 24293, - "emery": 24294, - "flatly": 24295, - "azure": 24296, - "epilogue": 24297, - "##wark": 24298, - "flourish": 24299, - "##iny": 24300, - "##tracted": 24301, - "##overs": 24302, - "##oshi": 24303, - "bestseller": 24304, - "distressed": 24305, - "receipt": 24306, - "spitting": 24307, - "hermit": 24308, - "topological": 24309, - "##cot": 24310, - "drilled": 24311, - "subunit": 24312, - "francs": 24313, - "##layer": 24314, - "eel": 24315, - "##fk": 24316, - "##itas": 24317, - "octopus": 24318, - "footprint": 24319, - "petitions": 24320, - "ufo": 24321, - "##say": 24322, - "##foil": 24323, - "interfering": 24324, - "leaking": 24325, - "palo": 24326, - "##metry": 24327, - "thistle": 24328, - "valiant": 24329, - "##pic": 24330, - "narayan": 24331, - "mcpherson": 24332, - "##fast": 24333, - "gonzales": 24334, - "##ym": 24335, - "##enne": 24336, - "dustin": 24337, - "novgorod": 24338, - "solos": 24339, - "##zman": 24340, - "doin": 24341, - "##raph": 24342, - "##patient": 24343, - "##meyer": 24344, - "soluble": 24345, - "ashland": 24346, - "cuffs": 24347, - "carole": 24348, - "pendleton": 24349, - "whistling": 24350, - "vassal": 24351, - "##river": 24352, - "deviation": 24353, - "revisited": 24354, - "constituents": 24355, - "rallied": 24356, - "rotate": 24357, - "loomed": 24358, - "##eil": 24359, - "##nting": 24360, - "amateurs": 24361, - "augsburg": 24362, - "auschwitz": 24363, - "crowns": 24364, - "skeletons": 24365, - "##cona": 24366, - "bonnet": 24367, - "257": 24368, - "dummy": 24369, - "globalization": 24370, - "simeon": 24371, - "sleeper": 24372, - "mandal": 24373, - "differentiated": 24374, - "##crow": 24375, - "##mare": 24376, - "milne": 24377, - "bundled": 24378, - "exasperated": 24379, - "talmud": 24380, - "owes": 24381, - "segregated": 24382, - "##feng": 24383, - "##uary": 24384, - "dentist": 24385, - "piracy": 24386, - "props": 24387, - "##rang": 24388, - "devlin": 24389, - "##torium": 24390, - "malicious": 24391, - "paws": 24392, - "##laid": 24393, - "dependency": 24394, - "##ergy": 24395, - "##fers": 24396, - "##enna": 24397, - "258": 24398, - "pistons": 24399, - "rourke": 24400, - "jed": 24401, - "grammatical": 24402, - "tres": 24403, - "maha": 24404, - "wig": 24405, - "512": 24406, - "ghostly": 24407, - "jayne": 24408, - "##achal": 24409, - "##creen": 24410, - "##ilis": 24411, - "##lins": 24412, - "##rence": 24413, - "designate": 24414, - "##with": 24415, - "arrogance": 24416, - "cambodian": 24417, - "clones": 24418, - "showdown": 24419, - "throttle": 24420, - "twain": 24421, - "##ception": 24422, - "lobes": 24423, - "metz": 24424, - "nagoya": 24425, - "335": 24426, - "braking": 24427, - "##furt": 24428, - "385": 24429, - "roaming": 24430, - "##minster": 24431, - "amin": 24432, - "crippled": 24433, - "##37": 24434, - "##llary": 24435, - "indifferent": 24436, - "hoffmann": 24437, - "idols": 24438, - "intimidating": 24439, - "1751": 24440, - "261": 24441, - "influenza": 24442, - "memo": 24443, - "onions": 24444, - "1748": 24445, - "bandage": 24446, - "consciously": 24447, - "##landa": 24448, - "##rage": 24449, - "clandestine": 24450, - "observes": 24451, - "swiped": 24452, - "tangle": 24453, - "##ener": 24454, - "##jected": 24455, - "##trum": 24456, - "##bill": 24457, - "##lta": 24458, - "hugs": 24459, - "congresses": 24460, - "josiah": 24461, - "spirited": 24462, - "##dek": 24463, - "humanist": 24464, - "managerial": 24465, - "filmmaking": 24466, - "inmate": 24467, - "rhymes": 24468, - "debuting": 24469, - "grimsby": 24470, - "ur": 24471, - "##laze": 24472, - "duplicate": 24473, - "vigor": 24474, - "##tf": 24475, - "republished": 24476, - "bolshevik": 24477, - "refurbishment": 24478, - "antibiotics": 24479, - "martini": 24480, - "methane": 24481, - "newscasts": 24482, - "royale": 24483, - "horizons": 24484, - "levant": 24485, - "iain": 24486, - "visas": 24487, - "##ischen": 24488, - "paler": 24489, - "##around": 24490, - "manifestation": 24491, - "snuck": 24492, - "alf": 24493, - "chop": 24494, - "futile": 24495, - "pedestal": 24496, - "rehab": 24497, - "##kat": 24498, - "bmg": 24499, - "kerman": 24500, - "res": 24501, - "fairbanks": 24502, - "jarrett": 24503, - "abstraction": 24504, - "saharan": 24505, - "##zek": 24506, - "1746": 24507, - "procedural": 24508, - "clearer": 24509, - "kincaid": 24510, - "sash": 24511, - "luciano": 24512, - "##ffey": 24513, - "crunch": 24514, - "helmut": 24515, - "##vara": 24516, - "revolutionaries": 24517, - "##tute": 24518, - "creamy": 24519, - "leach": 24520, - "##mmon": 24521, - "1747": 24522, - "permitting": 24523, - "nes": 24524, - "plight": 24525, - "wendell": 24526, - "##lese": 24527, - "contra": 24528, - "ts": 24529, - "clancy": 24530, - "ipa": 24531, - "mach": 24532, - "staples": 24533, - "autopsy": 24534, - "disturbances": 24535, - "nueva": 24536, - "karin": 24537, - "pontiac": 24538, - "##uding": 24539, - "proxy": 24540, - "venerable": 24541, - "haunt": 24542, - "leto": 24543, - "bergman": 24544, - "expands": 24545, - "##helm": 24546, - "wal": 24547, - "##pipe": 24548, - "canning": 24549, - "celine": 24550, - "cords": 24551, - "obesity": 24552, - "##enary": 24553, - "intrusion": 24554, - "planner": 24555, - "##phate": 24556, - "reasoned": 24557, - "sequencing": 24558, - "307": 24559, - "harrow": 24560, - "##chon": 24561, - "##dora": 24562, - "marred": 24563, - "mcintyre": 24564, - "repay": 24565, - "tarzan": 24566, - "darting": 24567, - "248": 24568, - "harrisburg": 24569, - "margarita": 24570, - "repulsed": 24571, - "##hur": 24572, - "##lding": 24573, - "belinda": 24574, - "hamburger": 24575, - "novo": 24576, - "compliant": 24577, - "runways": 24578, - "bingham": 24579, - "registrar": 24580, - "skyscraper": 24581, - "ic": 24582, - "cuthbert": 24583, - "improvisation": 24584, - "livelihood": 24585, - "##corp": 24586, - "##elial": 24587, - "admiring": 24588, - "##dened": 24589, - "sporadic": 24590, - "believer": 24591, - "casablanca": 24592, - "popcorn": 24593, - "##29": 24594, - "asha": 24595, - "shovel": 24596, - "##bek": 24597, - "##dice": 24598, - "coiled": 24599, - "tangible": 24600, - "##dez": 24601, - "casper": 24602, - "elsie": 24603, - "resin": 24604, - "tenderness": 24605, - "rectory": 24606, - "##ivision": 24607, - "avail": 24608, - "sonar": 24609, - "##mori": 24610, - "boutique": 24611, - "##dier": 24612, - "guerre": 24613, - "bathed": 24614, - "upbringing": 24615, - "vaulted": 24616, - "sandals": 24617, - "blessings": 24618, - "##naut": 24619, - "##utnant": 24620, - "1680": 24621, - "306": 24622, - "foxes": 24623, - "pia": 24624, - "corrosion": 24625, - "hesitantly": 24626, - "confederates": 24627, - "crystalline": 24628, - "footprints": 24629, - "shapiro": 24630, - "tirana": 24631, - "valentin": 24632, - "drones": 24633, - "45th": 24634, - "microscope": 24635, - "shipments": 24636, - "texted": 24637, - "inquisition": 24638, - "wry": 24639, - "guernsey": 24640, - "unauthorized": 24641, - "resigning": 24642, - "760": 24643, - "ripple": 24644, - "schubert": 24645, - "stu": 24646, - "reassure": 24647, - "felony": 24648, - "##ardo": 24649, - "brittle": 24650, - "koreans": 24651, - "##havan": 24652, - "##ives": 24653, - "dun": 24654, - "implicit": 24655, - "tyres": 24656, - "##aldi": 24657, - "##lth": 24658, - "magnolia": 24659, - "##ehan": 24660, - "##puri": 24661, - "##poulos": 24662, - "aggressively": 24663, - "fei": 24664, - "gr": 24665, - "familiarity": 24666, - "##poo": 24667, - "indicative": 24668, - "##trust": 24669, - "fundamentally": 24670, - "jimmie": 24671, - "overrun": 24672, - "395": 24673, - "anchors": 24674, - "moans": 24675, - "##opus": 24676, - "britannia": 24677, - "armagh": 24678, - "##ggle": 24679, - "purposely": 24680, - "seizing": 24681, - "##vao": 24682, - "bewildered": 24683, - "mundane": 24684, - "avoidance": 24685, - "cosmopolitan": 24686, - "geometridae": 24687, - "quartermaster": 24688, - "caf": 24689, - "415": 24690, - "chatter": 24691, - "engulfed": 24692, - "gleam": 24693, - "purge": 24694, - "##icate": 24695, - "juliette": 24696, - "jurisprudence": 24697, - "guerra": 24698, - "revisions": 24699, - "##bn": 24700, - "casimir": 24701, - "brew": 24702, - "##jm": 24703, - "1749": 24704, - "clapton": 24705, - "cloudy": 24706, - "conde": 24707, - "hermitage": 24708, - "278": 24709, - "simulations": 24710, - "torches": 24711, - "vincenzo": 24712, - "matteo": 24713, - "##rill": 24714, - "hidalgo": 24715, - "booming": 24716, - "westbound": 24717, - "accomplishment": 24718, - "tentacles": 24719, - "unaffected": 24720, - "##sius": 24721, - "annabelle": 24722, - "flopped": 24723, - "sloping": 24724, - "##litz": 24725, - "dreamer": 24726, - "interceptor": 24727, - "vu": 24728, - "##loh": 24729, - "consecration": 24730, - "copying": 24731, - "messaging": 24732, - "breaker": 24733, - "climates": 24734, - "hospitalized": 24735, - "1752": 24736, - "torino": 24737, - "afternoons": 24738, - "winfield": 24739, - "witnessing": 24740, - "##teacher": 24741, - "breakers": 24742, - "choirs": 24743, - "sawmill": 24744, - "coldly": 24745, - "##ege": 24746, - "sipping": 24747, - "haste": 24748, - "uninhabited": 24749, - "conical": 24750, - "bibliography": 24751, - "pamphlets": 24752, - "severn": 24753, - "edict": 24754, - "##oca": 24755, - "deux": 24756, - "illnesses": 24757, - "grips": 24758, - "##pl": 24759, - "rehearsals": 24760, - "sis": 24761, - "thinkers": 24762, - "tame": 24763, - "##keepers": 24764, - "1690": 24765, - "acacia": 24766, - "reformer": 24767, - "##osed": 24768, - "##rys": 24769, - "shuffling": 24770, - "##iring": 24771, - "##shima": 24772, - "eastbound": 24773, - "ionic": 24774, - "rhea": 24775, - "flees": 24776, - "littered": 24777, - "##oum": 24778, - "rocker": 24779, - "vomiting": 24780, - "groaning": 24781, - "champ": 24782, - "overwhelmingly": 24783, - "civilizations": 24784, - "paces": 24785, - "sloop": 24786, - "adoptive": 24787, - "##tish": 24788, - "skaters": 24789, - "##vres": 24790, - "aiding": 24791, - "mango": 24792, - "##joy": 24793, - "nikola": 24794, - "shriek": 24795, - "##ignon": 24796, - "pharmaceuticals": 24797, - "##mg": 24798, - "tuna": 24799, - "calvert": 24800, - "gustavo": 24801, - "stocked": 24802, - "yearbook": 24803, - "##urai": 24804, - "##mana": 24805, - "computed": 24806, - "subsp": 24807, - "riff": 24808, - "hanoi": 24809, - "kelvin": 24810, - "hamid": 24811, - "moors": 24812, - "pastures": 24813, - "summons": 24814, - "jihad": 24815, - "nectar": 24816, - "##ctors": 24817, - "bayou": 24818, - "untitled": 24819, - "pleasing": 24820, - "vastly": 24821, - "republics": 24822, - "intellect": 24823, - "##η": 24824, - "##ulio": 24825, - "##tou": 24826, - "crumbling": 24827, - "stylistic": 24828, - "sb": 24829, - "##ی": 24830, - "consolation": 24831, - "frequented": 24832, - "h₂o": 24833, - "walden": 24834, - "widows": 24835, - "##iens": 24836, - "404": 24837, - "##ignment": 24838, - "chunks": 24839, - "improves": 24840, - "288": 24841, - "grit": 24842, - "recited": 24843, - "##dev": 24844, - "snarl": 24845, - "sociological": 24846, - "##arte": 24847, - "##gul": 24848, - "inquired": 24849, - "##held": 24850, - "bruise": 24851, - "clube": 24852, - "consultancy": 24853, - "homogeneous": 24854, - "hornets": 24855, - "multiplication": 24856, - "pasta": 24857, - "prick": 24858, - "savior": 24859, - "##grin": 24860, - "##kou": 24861, - "##phile": 24862, - "yoon": 24863, - "##gara": 24864, - "grimes": 24865, - "vanishing": 24866, - "cheering": 24867, - "reacting": 24868, - "bn": 24869, - "distillery": 24870, - "##quisite": 24871, - "##vity": 24872, - "coe": 24873, - "dockyard": 24874, - "massif": 24875, - "##jord": 24876, - "escorts": 24877, - "voss": 24878, - "##valent": 24879, - "byte": 24880, - "chopped": 24881, - "hawke": 24882, - "illusions": 24883, - "workings": 24884, - "floats": 24885, - "##koto": 24886, - "##vac": 24887, - "kv": 24888, - "annapolis": 24889, - "madden": 24890, - "##onus": 24891, - "alvaro": 24892, - "noctuidae": 24893, - "##cum": 24894, - "##scopic": 24895, - "avenge": 24896, - "steamboat": 24897, - "forte": 24898, - "illustrates": 24899, - "erika": 24900, - "##trip": 24901, - "570": 24902, - "dew": 24903, - "nationalities": 24904, - "bran": 24905, - "manifested": 24906, - "thirsty": 24907, - "diversified": 24908, - "muscled": 24909, - "reborn": 24910, - "##standing": 24911, - "arson": 24912, - "##lessness": 24913, - "##dran": 24914, - "##logram": 24915, - "##boys": 24916, - "##kushima": 24917, - "##vious": 24918, - "willoughby": 24919, - "##phobia": 24920, - "286": 24921, - "alsace": 24922, - "dashboard": 24923, - "yuki": 24924, - "##chai": 24925, - "granville": 24926, - "myspace": 24927, - "publicized": 24928, - "tricked": 24929, - "##gang": 24930, - "adjective": 24931, - "##ater": 24932, - "relic": 24933, - "reorganisation": 24934, - "enthusiastically": 24935, - "indications": 24936, - "saxe": 24937, - "##lassified": 24938, - "consolidate": 24939, - "iec": 24940, - "padua": 24941, - "helplessly": 24942, - "ramps": 24943, - "renaming": 24944, - "regulars": 24945, - "pedestrians": 24946, - "accents": 24947, - "convicts": 24948, - "inaccurate": 24949, - "lowers": 24950, - "mana": 24951, - "##pati": 24952, - "barrie": 24953, - "bjp": 24954, - "outta": 24955, - "someplace": 24956, - "berwick": 24957, - "flanking": 24958, - "invoked": 24959, - "marrow": 24960, - "sparsely": 24961, - "excerpts": 24962, - "clothed": 24963, - "rei": 24964, - "##ginal": 24965, - "wept": 24966, - "##straße": 24967, - "##vish": 24968, - "alexa": 24969, - "excel": 24970, - "##ptive": 24971, - "membranes": 24972, - "aquitaine": 24973, - "creeks": 24974, - "cutler": 24975, - "sheppard": 24976, - "implementations": 24977, - "ns": 24978, - "##dur": 24979, - "fragrance": 24980, - "budge": 24981, - "concordia": 24982, - "magnesium": 24983, - "marcelo": 24984, - "##antes": 24985, - "gladly": 24986, - "vibrating": 24987, - "##rral": 24988, - "##ggles": 24989, - "montrose": 24990, - "##omba": 24991, - "lew": 24992, - "seamus": 24993, - "1630": 24994, - "cocky": 24995, - "##ament": 24996, - "##uen": 24997, - "bjorn": 24998, - "##rrick": 24999, - "fielder": 25000, - "fluttering": 25001, - "##lase": 25002, - "methyl": 25003, - "kimberley": 25004, - "mcdowell": 25005, - "reductions": 25006, - "barbed": 25007, - "##jic": 25008, - "##tonic": 25009, - "aeronautical": 25010, - "condensed": 25011, - "distracting": 25012, - "##promising": 25013, - "huffed": 25014, - "##cala": 25015, - "##sle": 25016, - "claudius": 25017, - "invincible": 25018, - "missy": 25019, - "pious": 25020, - "balthazar": 25021, - "ci": 25022, - "##lang": 25023, - "butte": 25024, - "combo": 25025, - "orson": 25026, - "##dication": 25027, - "myriad": 25028, - "1707": 25029, - "silenced": 25030, - "##fed": 25031, - "##rh": 25032, - "coco": 25033, - "netball": 25034, - "yourselves": 25035, - "##oza": 25036, - "clarify": 25037, - "heller": 25038, - "peg": 25039, - "durban": 25040, - "etudes": 25041, - "offender": 25042, - "roast": 25043, - "blackmail": 25044, - "curvature": 25045, - "##woods": 25046, - "vile": 25047, - "309": 25048, - "illicit": 25049, - "suriname": 25050, - "##linson": 25051, - "overture": 25052, - "1685": 25053, - "bubbling": 25054, - "gymnast": 25055, - "tucking": 25056, - "##mming": 25057, - "##ouin": 25058, - "maldives": 25059, - "##bala": 25060, - "gurney": 25061, - "##dda": 25062, - "##eased": 25063, - "##oides": 25064, - "backside": 25065, - "pinto": 25066, - "jars": 25067, - "racehorse": 25068, - "tending": 25069, - "##rdial": 25070, - "baronetcy": 25071, - "wiener": 25072, - "duly": 25073, - "##rke": 25074, - "barbarian": 25075, - "cupping": 25076, - "flawed": 25077, - "##thesis": 25078, - "bertha": 25079, - "pleistocene": 25080, - "puddle": 25081, - "swearing": 25082, - "##nob": 25083, - "##tically": 25084, - "fleeting": 25085, - "prostate": 25086, - "amulet": 25087, - "educating": 25088, - "##mined": 25089, - "##iti": 25090, - "##tler": 25091, - "75th": 25092, - "jens": 25093, - "respondents": 25094, - "analytics": 25095, - "cavaliers": 25096, - "papacy": 25097, - "raju": 25098, - "##iente": 25099, - "##ulum": 25100, - "##tip": 25101, - "funnel": 25102, - "271": 25103, - "disneyland": 25104, - "##lley": 25105, - "sociologist": 25106, - "##iam": 25107, - "2500": 25108, - "faulkner": 25109, - "louvre": 25110, - "menon": 25111, - "##dson": 25112, - "276": 25113, - "##ower": 25114, - "afterlife": 25115, - "mannheim": 25116, - "peptide": 25117, - "referees": 25118, - "comedians": 25119, - "meaningless": 25120, - "##anger": 25121, - "##laise": 25122, - "fabrics": 25123, - "hurley": 25124, - "renal": 25125, - "sleeps": 25126, - "##bour": 25127, - "##icle": 25128, - "breakout": 25129, - "kristin": 25130, - "roadside": 25131, - "animator": 25132, - "clover": 25133, - "disdain": 25134, - "unsafe": 25135, - "redesign": 25136, - "##urity": 25137, - "firth": 25138, - "barnsley": 25139, - "portage": 25140, - "reset": 25141, - "narrows": 25142, - "268": 25143, - "commandos": 25144, - "expansive": 25145, - "speechless": 25146, - "tubular": 25147, - "##lux": 25148, - "essendon": 25149, - "eyelashes": 25150, - "smashwords": 25151, - "##yad": 25152, - "##bang": 25153, - "##claim": 25154, - "craved": 25155, - "sprinted": 25156, - "chet": 25157, - "somme": 25158, - "astor": 25159, - "wrocław": 25160, - "orton": 25161, - "266": 25162, - "bane": 25163, - "##erving": 25164, - "##uing": 25165, - "mischief": 25166, - "##amps": 25167, - "##sund": 25168, - "scaling": 25169, - "terre": 25170, - "##xious": 25171, - "impairment": 25172, - "offenses": 25173, - "undermine": 25174, - "moi": 25175, - "soy": 25176, - "contiguous": 25177, - "arcadia": 25178, - "inuit": 25179, - "seam": 25180, - "##tops": 25181, - "macbeth": 25182, - "rebelled": 25183, - "##icative": 25184, - "##iot": 25185, - "590": 25186, - "elaborated": 25187, - "frs": 25188, - "uniformed": 25189, - "##dberg": 25190, - "259": 25191, - "powerless": 25192, - "priscilla": 25193, - "stimulated": 25194, - "980": 25195, - "qc": 25196, - "arboretum": 25197, - "frustrating": 25198, - "trieste": 25199, - "bullock": 25200, - "##nified": 25201, - "enriched": 25202, - "glistening": 25203, - "intern": 25204, - "##adia": 25205, - "locus": 25206, - "nouvelle": 25207, - "ollie": 25208, - "ike": 25209, - "lash": 25210, - "starboard": 25211, - "ee": 25212, - "tapestry": 25213, - "headlined": 25214, - "hove": 25215, - "rigged": 25216, - "##vite": 25217, - "pollock": 25218, - "##yme": 25219, - "thrive": 25220, - "clustered": 25221, - "cas": 25222, - "roi": 25223, - "gleamed": 25224, - "olympiad": 25225, - "##lino": 25226, - "pressured": 25227, - "regimes": 25228, - "##hosis": 25229, - "##lick": 25230, - "ripley": 25231, - "##ophone": 25232, - "kickoff": 25233, - "gallon": 25234, - "rockwell": 25235, - "##arable": 25236, - "crusader": 25237, - "glue": 25238, - "revolutions": 25239, - "scrambling": 25240, - "1714": 25241, - "grover": 25242, - "##jure": 25243, - "englishman": 25244, - "aztec": 25245, - "263": 25246, - "contemplating": 25247, - "coven": 25248, - "ipad": 25249, - "preach": 25250, - "triumphant": 25251, - "tufts": 25252, - "##esian": 25253, - "rotational": 25254, - "##phus": 25255, - "328": 25256, - "falkland": 25257, - "##brates": 25258, - "strewn": 25259, - "clarissa": 25260, - "rejoin": 25261, - "environmentally": 25262, - "glint": 25263, - "banded": 25264, - "drenched": 25265, - "moat": 25266, - "albanians": 25267, - "johor": 25268, - "rr": 25269, - "maestro": 25270, - "malley": 25271, - "nouveau": 25272, - "shaded": 25273, - "taxonomy": 25274, - "v6": 25275, - "adhere": 25276, - "bunk": 25277, - "airfields": 25278, - "##ritan": 25279, - "1741": 25280, - "encompass": 25281, - "remington": 25282, - "tran": 25283, - "##erative": 25284, - "amelie": 25285, - "mazda": 25286, - "friar": 25287, - "morals": 25288, - "passions": 25289, - "##zai": 25290, - "breadth": 25291, - "vis": 25292, - "##hae": 25293, - "argus": 25294, - "burnham": 25295, - "caressing": 25296, - "insider": 25297, - "rudd": 25298, - "##imov": 25299, - "##mini": 25300, - "##rso": 25301, - "italianate": 25302, - "murderous": 25303, - "textual": 25304, - "wainwright": 25305, - "armada": 25306, - "bam": 25307, - "weave": 25308, - "timer": 25309, - "##taken": 25310, - "##nh": 25311, - "fra": 25312, - "##crest": 25313, - "ardent": 25314, - "salazar": 25315, - "taps": 25316, - "tunis": 25317, - "##ntino": 25318, - "allegro": 25319, - "gland": 25320, - "philanthropic": 25321, - "##chester": 25322, - "implication": 25323, - "##optera": 25324, - "esq": 25325, - "judas": 25326, - "noticeably": 25327, - "wynn": 25328, - "##dara": 25329, - "inched": 25330, - "indexed": 25331, - "crises": 25332, - "villiers": 25333, - "bandit": 25334, - "royalties": 25335, - "patterned": 25336, - "cupboard": 25337, - "interspersed": 25338, - "accessory": 25339, - "isla": 25340, - "kendrick": 25341, - "entourage": 25342, - "stitches": 25343, - "##esthesia": 25344, - "headwaters": 25345, - "##ior": 25346, - "interlude": 25347, - "distraught": 25348, - "draught": 25349, - "1727": 25350, - "##basket": 25351, - "biased": 25352, - "sy": 25353, - "transient": 25354, - "triad": 25355, - "subgenus": 25356, - "adapting": 25357, - "kidd": 25358, - "shortstop": 25359, - "##umatic": 25360, - "dimly": 25361, - "spiked": 25362, - "mcleod": 25363, - "reprint": 25364, - "nellie": 25365, - "pretoria": 25366, - "windmill": 25367, - "##cek": 25368, - "singled": 25369, - "##mps": 25370, - "273": 25371, - "reunite": 25372, - "##orous": 25373, - "747": 25374, - "bankers": 25375, - "outlying": 25376, - "##omp": 25377, - "##ports": 25378, - "##tream": 25379, - "apologies": 25380, - "cosmetics": 25381, - "patsy": 25382, - "##deh": 25383, - "##ocks": 25384, - "##yson": 25385, - "bender": 25386, - "nantes": 25387, - "serene": 25388, - "##nad": 25389, - "lucha": 25390, - "mmm": 25391, - "323": 25392, - "##cius": 25393, - "##gli": 25394, - "cmll": 25395, - "coinage": 25396, - "nestor": 25397, - "juarez": 25398, - "##rook": 25399, - "smeared": 25400, - "sprayed": 25401, - "twitching": 25402, - "sterile": 25403, - "irina": 25404, - "embodied": 25405, - "juveniles": 25406, - "enveloped": 25407, - "miscellaneous": 25408, - "cancers": 25409, - "dq": 25410, - "gulped": 25411, - "luisa": 25412, - "crested": 25413, - "swat": 25414, - "donegal": 25415, - "ref": 25416, - "##anov": 25417, - "##acker": 25418, - "hearst": 25419, - "mercantile": 25420, - "##lika": 25421, - "doorbell": 25422, - "ua": 25423, - "vicki": 25424, - "##alla": 25425, - "##som": 25426, - "bilbao": 25427, - "psychologists": 25428, - "stryker": 25429, - "sw": 25430, - "horsemen": 25431, - "turkmenistan": 25432, - "wits": 25433, - "##national": 25434, - "anson": 25435, - "mathew": 25436, - "screenings": 25437, - "##umb": 25438, - "rihanna": 25439, - "##agne": 25440, - "##nessy": 25441, - "aisles": 25442, - "##iani": 25443, - "##osphere": 25444, - "hines": 25445, - "kenton": 25446, - "saskatoon": 25447, - "tasha": 25448, - "truncated": 25449, - "##champ": 25450, - "##itan": 25451, - "mildred": 25452, - "advises": 25453, - "fredrik": 25454, - "interpreting": 25455, - "inhibitors": 25456, - "##athi": 25457, - "spectroscopy": 25458, - "##hab": 25459, - "##kong": 25460, - "karim": 25461, - "panda": 25462, - "##oia": 25463, - "##nail": 25464, - "##vc": 25465, - "conqueror": 25466, - "kgb": 25467, - "leukemia": 25468, - "##dity": 25469, - "arrivals": 25470, - "cheered": 25471, - "pisa": 25472, - "phosphorus": 25473, - "shielded": 25474, - "##riated": 25475, - "mammal": 25476, - "unitarian": 25477, - "urgently": 25478, - "chopin": 25479, - "sanitary": 25480, - "##mission": 25481, - "spicy": 25482, - "drugged": 25483, - "hinges": 25484, - "##tort": 25485, - "tipping": 25486, - "trier": 25487, - "impoverished": 25488, - "westchester": 25489, - "##caster": 25490, - "267": 25491, - "epoch": 25492, - "nonstop": 25493, - "##gman": 25494, - "##khov": 25495, - "aromatic": 25496, - "centrally": 25497, - "cerro": 25498, - "##tively": 25499, - "##vio": 25500, - "billions": 25501, - "modulation": 25502, - "sedimentary": 25503, - "283": 25504, - "facilitating": 25505, - "outrageous": 25506, - "goldstein": 25507, - "##eak": 25508, - "##kt": 25509, - "ld": 25510, - "maitland": 25511, - "penultimate": 25512, - "pollard": 25513, - "##dance": 25514, - "fleets": 25515, - "spaceship": 25516, - "vertebrae": 25517, - "##nig": 25518, - "alcoholism": 25519, - "als": 25520, - "recital": 25521, - "##bham": 25522, - "##ference": 25523, - "##omics": 25524, - "m2": 25525, - "##bm": 25526, - "trois": 25527, - "##tropical": 25528, - "##в": 25529, - "commemorates": 25530, - "##meric": 25531, - "marge": 25532, - "##raction": 25533, - "1643": 25534, - "670": 25535, - "cosmetic": 25536, - "ravaged": 25537, - "##ige": 25538, - "catastrophe": 25539, - "eng": 25540, - "##shida": 25541, - "albrecht": 25542, - "arterial": 25543, - "bellamy": 25544, - "decor": 25545, - "harmon": 25546, - "##rde": 25547, - "bulbs": 25548, - "synchronized": 25549, - "vito": 25550, - "easiest": 25551, - "shetland": 25552, - "shielding": 25553, - "wnba": 25554, - "##glers": 25555, - "##ssar": 25556, - "##riam": 25557, - "brianna": 25558, - "cumbria": 25559, - "##aceous": 25560, - "##rard": 25561, - "cores": 25562, - "thayer": 25563, - "##nsk": 25564, - "brood": 25565, - "hilltop": 25566, - "luminous": 25567, - "carts": 25568, - "keynote": 25569, - "larkin": 25570, - "logos": 25571, - "##cta": 25572, - "##ا": 25573, - "##mund": 25574, - "##quay": 25575, - "lilith": 25576, - "tinted": 25577, - "277": 25578, - "wrestle": 25579, - "mobilization": 25580, - "##uses": 25581, - "sequential": 25582, - "siam": 25583, - "bloomfield": 25584, - "takahashi": 25585, - "274": 25586, - "##ieving": 25587, - "presenters": 25588, - "ringo": 25589, - "blazed": 25590, - "witty": 25591, - "##oven": 25592, - "##ignant": 25593, - "devastation": 25594, - "haydn": 25595, - "harmed": 25596, - "newt": 25597, - "therese": 25598, - "##peed": 25599, - "gershwin": 25600, - "molina": 25601, - "rabbis": 25602, - "sudanese": 25603, - "001": 25604, - "innate": 25605, - "restarted": 25606, - "##sack": 25607, - "##fus": 25608, - "slices": 25609, - "wb": 25610, - "##shah": 25611, - "enroll": 25612, - "hypothetical": 25613, - "hysterical": 25614, - "1743": 25615, - "fabio": 25616, - "indefinite": 25617, - "warped": 25618, - "##hg": 25619, - "exchanging": 25620, - "525": 25621, - "unsuitable": 25622, - "##sboro": 25623, - "gallo": 25624, - "1603": 25625, - "bret": 25626, - "cobalt": 25627, - "homemade": 25628, - "##hunter": 25629, - "mx": 25630, - "operatives": 25631, - "##dhar": 25632, - "terraces": 25633, - "durable": 25634, - "latch": 25635, - "pens": 25636, - "whorls": 25637, - "##ctuated": 25638, - "##eaux": 25639, - "billing": 25640, - "ligament": 25641, - "succumbed": 25642, - "##gly": 25643, - "regulators": 25644, - "spawn": 25645, - "##brick": 25646, - "##stead": 25647, - "filmfare": 25648, - "rochelle": 25649, - "##nzo": 25650, - "1725": 25651, - "circumstance": 25652, - "saber": 25653, - "supplements": 25654, - "##nsky": 25655, - "##tson": 25656, - "crowe": 25657, - "wellesley": 25658, - "carrot": 25659, - "##9th": 25660, - "##movable": 25661, - "primate": 25662, - "drury": 25663, - "sincerely": 25664, - "topical": 25665, - "##mad": 25666, - "##rao": 25667, - "callahan": 25668, - "kyiv": 25669, - "smarter": 25670, - "tits": 25671, - "undo": 25672, - "##yeh": 25673, - "announcements": 25674, - "anthologies": 25675, - "barrio": 25676, - "nebula": 25677, - "##islaus": 25678, - "##shaft": 25679, - "##tyn": 25680, - "bodyguards": 25681, - "2021": 25682, - "assassinate": 25683, - "barns": 25684, - "emmett": 25685, - "scully": 25686, - "##mah": 25687, - "##yd": 25688, - "##eland": 25689, - "##tino": 25690, - "##itarian": 25691, - "demoted": 25692, - "gorman": 25693, - "lashed": 25694, - "prized": 25695, - "adventist": 25696, - "writ": 25697, - "##gui": 25698, - "alla": 25699, - "invertebrates": 25700, - "##ausen": 25701, - "1641": 25702, - "amman": 25703, - "1742": 25704, - "align": 25705, - "healy": 25706, - "redistribution": 25707, - "##gf": 25708, - "##rize": 25709, - "insulation": 25710, - "##drop": 25711, - "adherents": 25712, - "hezbollah": 25713, - "vitro": 25714, - "ferns": 25715, - "yanking": 25716, - "269": 25717, - "php": 25718, - "registering": 25719, - "uppsala": 25720, - "cheerleading": 25721, - "confines": 25722, - "mischievous": 25723, - "tully": 25724, - "##ross": 25725, - "49th": 25726, - "docked": 25727, - "roam": 25728, - "stipulated": 25729, - "pumpkin": 25730, - "##bry": 25731, - "prompt": 25732, - "##ezer": 25733, - "blindly": 25734, - "shuddering": 25735, - "craftsmen": 25736, - "frail": 25737, - "scented": 25738, - "katharine": 25739, - "scramble": 25740, - "shaggy": 25741, - "sponge": 25742, - "helix": 25743, - "zaragoza": 25744, - "279": 25745, - "##52": 25746, - "43rd": 25747, - "backlash": 25748, - "fontaine": 25749, - "seizures": 25750, - "posse": 25751, - "cowan": 25752, - "nonfiction": 25753, - "telenovela": 25754, - "wwii": 25755, - "hammered": 25756, - "undone": 25757, - "##gpur": 25758, - "encircled": 25759, - "irs": 25760, - "##ivation": 25761, - "artefacts": 25762, - "oneself": 25763, - "searing": 25764, - "smallpox": 25765, - "##belle": 25766, - "##osaurus": 25767, - "shandong": 25768, - "breached": 25769, - "upland": 25770, - "blushing": 25771, - "rankin": 25772, - "infinitely": 25773, - "psyche": 25774, - "tolerated": 25775, - "docking": 25776, - "evicted": 25777, - "##col": 25778, - "unmarked": 25779, - "##lving": 25780, - "gnome": 25781, - "lettering": 25782, - "litres": 25783, - "musique": 25784, - "##oint": 25785, - "benevolent": 25786, - "##jal": 25787, - "blackened": 25788, - "##anna": 25789, - "mccall": 25790, - "racers": 25791, - "tingle": 25792, - "##ocene": 25793, - "##orestation": 25794, - "introductions": 25795, - "radically": 25796, - "292": 25797, - "##hiff": 25798, - "##باد": 25799, - "1610": 25800, - "1739": 25801, - "munchen": 25802, - "plead": 25803, - "##nka": 25804, - "condo": 25805, - "scissors": 25806, - "##sight": 25807, - "##tens": 25808, - "apprehension": 25809, - "##cey": 25810, - "##yin": 25811, - "hallmark": 25812, - "watering": 25813, - "formulas": 25814, - "sequels": 25815, - "##llas": 25816, - "aggravated": 25817, - "bae": 25818, - "commencing": 25819, - "##building": 25820, - "enfield": 25821, - "prohibits": 25822, - "marne": 25823, - "vedic": 25824, - "civilized": 25825, - "euclidean": 25826, - "jagger": 25827, - "beforehand": 25828, - "blasts": 25829, - "dumont": 25830, - "##arney": 25831, - "##nem": 25832, - "740": 25833, - "conversions": 25834, - "hierarchical": 25835, - "rios": 25836, - "simulator": 25837, - "##dya": 25838, - "##lellan": 25839, - "hedges": 25840, - "oleg": 25841, - "thrusts": 25842, - "shadowed": 25843, - "darby": 25844, - "maximize": 25845, - "1744": 25846, - "gregorian": 25847, - "##nded": 25848, - "##routed": 25849, - "sham": 25850, - "unspecified": 25851, - "##hog": 25852, - "emory": 25853, - "factual": 25854, - "##smo": 25855, - "##tp": 25856, - "fooled": 25857, - "##rger": 25858, - "ortega": 25859, - "wellness": 25860, - "marlon": 25861, - "##oton": 25862, - "##urance": 25863, - "casket": 25864, - "keating": 25865, - "ley": 25866, - "enclave": 25867, - "##ayan": 25868, - "char": 25869, - "influencing": 25870, - "jia": 25871, - "##chenko": 25872, - "412": 25873, - "ammonia": 25874, - "erebidae": 25875, - "incompatible": 25876, - "violins": 25877, - "cornered": 25878, - "##arat": 25879, - "grooves": 25880, - "astronauts": 25881, - "columbian": 25882, - "rampant": 25883, - "fabrication": 25884, - "kyushu": 25885, - "mahmud": 25886, - "vanish": 25887, - "##dern": 25888, - "mesopotamia": 25889, - "##lete": 25890, - "ict": 25891, - "##rgen": 25892, - "caspian": 25893, - "kenji": 25894, - "pitted": 25895, - "##vered": 25896, - "999": 25897, - "grimace": 25898, - "roanoke": 25899, - "tchaikovsky": 25900, - "twinned": 25901, - "##analysis": 25902, - "##awan": 25903, - "xinjiang": 25904, - "arias": 25905, - "clemson": 25906, - "kazakh": 25907, - "sizable": 25908, - "1662": 25909, - "##khand": 25910, - "##vard": 25911, - "plunge": 25912, - "tatum": 25913, - "vittorio": 25914, - "##nden": 25915, - "cholera": 25916, - "##dana": 25917, - "##oper": 25918, - "bracing": 25919, - "indifference": 25920, - "projectile": 25921, - "superliga": 25922, - "##chee": 25923, - "realises": 25924, - "upgrading": 25925, - "299": 25926, - "porte": 25927, - "retribution": 25928, - "##vies": 25929, - "nk": 25930, - "stil": 25931, - "##resses": 25932, - "ama": 25933, - "bureaucracy": 25934, - "blackberry": 25935, - "bosch": 25936, - "testosterone": 25937, - "collapses": 25938, - "greer": 25939, - "##pathic": 25940, - "ioc": 25941, - "fifties": 25942, - "malls": 25943, - "##erved": 25944, - "bao": 25945, - "baskets": 25946, - "adolescents": 25947, - "siegfried": 25948, - "##osity": 25949, - "##tosis": 25950, - "mantra": 25951, - "detecting": 25952, - "existent": 25953, - "fledgling": 25954, - "##cchi": 25955, - "dissatisfied": 25956, - "gan": 25957, - "telecommunication": 25958, - "mingled": 25959, - "sobbed": 25960, - "6000": 25961, - "controversies": 25962, - "outdated": 25963, - "taxis": 25964, - "##raus": 25965, - "fright": 25966, - "slams": 25967, - "##lham": 25968, - "##fect": 25969, - "##tten": 25970, - "detectors": 25971, - "fetal": 25972, - "tanned": 25973, - "##uw": 25974, - "fray": 25975, - "goth": 25976, - "olympian": 25977, - "skipping": 25978, - "mandates": 25979, - "scratches": 25980, - "sheng": 25981, - "unspoken": 25982, - "hyundai": 25983, - "tracey": 25984, - "hotspur": 25985, - "restrictive": 25986, - "##buch": 25987, - "americana": 25988, - "mundo": 25989, - "##bari": 25990, - "burroughs": 25991, - "diva": 25992, - "vulcan": 25993, - "##6th": 25994, - "distinctions": 25995, - "thumping": 25996, - "##ngen": 25997, - "mikey": 25998, - "sheds": 25999, - "fide": 26000, - "rescues": 26001, - "springsteen": 26002, - "vested": 26003, - "valuation": 26004, - "##ece": 26005, - "##ely": 26006, - "pinnacle": 26007, - "rake": 26008, - "sylvie": 26009, - "##edo": 26010, - "almond": 26011, - "quivering": 26012, - "##irus": 26013, - "alteration": 26014, - "faltered": 26015, - "##wad": 26016, - "51st": 26017, - "hydra": 26018, - "ticked": 26019, - "##kato": 26020, - "recommends": 26021, - "##dicated": 26022, - "antigua": 26023, - "arjun": 26024, - "stagecoach": 26025, - "wilfred": 26026, - "trickle": 26027, - "pronouns": 26028, - "##pon": 26029, - "aryan": 26030, - "nighttime": 26031, - "##anian": 26032, - "gall": 26033, - "pea": 26034, - "stitch": 26035, - "##hei": 26036, - "leung": 26037, - "milos": 26038, - "##dini": 26039, - "eritrea": 26040, - "nexus": 26041, - "starved": 26042, - "snowfall": 26043, - "kant": 26044, - "parasitic": 26045, - "cot": 26046, - "discus": 26047, - "hana": 26048, - "strikers": 26049, - "appleton": 26050, - "kitchens": 26051, - "##erina": 26052, - "##partisan": 26053, - "##itha": 26054, - "##vius": 26055, - "disclose": 26056, - "metis": 26057, - "##channel": 26058, - "1701": 26059, - "tesla": 26060, - "##vera": 26061, - "fitch": 26062, - "1735": 26063, - "blooded": 26064, - "##tila": 26065, - "decimal": 26066, - "##tang": 26067, - "##bai": 26068, - "cyclones": 26069, - "eun": 26070, - "bottled": 26071, - "peas": 26072, - "pensacola": 26073, - "basha": 26074, - "bolivian": 26075, - "crabs": 26076, - "boil": 26077, - "lanterns": 26078, - "partridge": 26079, - "roofed": 26080, - "1645": 26081, - "necks": 26082, - "##phila": 26083, - "opined": 26084, - "patting": 26085, - "##kla": 26086, - "##lland": 26087, - "chuckles": 26088, - "volta": 26089, - "whereupon": 26090, - "##nche": 26091, - "devout": 26092, - "euroleague": 26093, - "suicidal": 26094, - "##dee": 26095, - "inherently": 26096, - "involuntary": 26097, - "knitting": 26098, - "nasser": 26099, - "##hide": 26100, - "puppets": 26101, - "colourful": 26102, - "courageous": 26103, - "southend": 26104, - "stills": 26105, - "miraculous": 26106, - "hodgson": 26107, - "richer": 26108, - "rochdale": 26109, - "ethernet": 26110, - "greta": 26111, - "uniting": 26112, - "prism": 26113, - "umm": 26114, - "##haya": 26115, - "##itical": 26116, - "##utation": 26117, - "deterioration": 26118, - "pointe": 26119, - "prowess": 26120, - "##ropriation": 26121, - "lids": 26122, - "scranton": 26123, - "billings": 26124, - "subcontinent": 26125, - "##koff": 26126, - "##scope": 26127, - "brute": 26128, - "kellogg": 26129, - "psalms": 26130, - "degraded": 26131, - "##vez": 26132, - "stanisław": 26133, - "##ructured": 26134, - "ferreira": 26135, - "pun": 26136, - "astonishing": 26137, - "gunnar": 26138, - "##yat": 26139, - "arya": 26140, - "prc": 26141, - "gottfried": 26142, - "##tight": 26143, - "excursion": 26144, - "##ographer": 26145, - "dina": 26146, - "##quil": 26147, - "##nare": 26148, - "huffington": 26149, - "illustrious": 26150, - "wilbur": 26151, - "gundam": 26152, - "verandah": 26153, - "##zard": 26154, - "naacp": 26155, - "##odle": 26156, - "constructive": 26157, - "fjord": 26158, - "kade": 26159, - "##naud": 26160, - "generosity": 26161, - "thrilling": 26162, - "baseline": 26163, - "cayman": 26164, - "frankish": 26165, - "plastics": 26166, - "accommodations": 26167, - "zoological": 26168, - "##fting": 26169, - "cedric": 26170, - "qb": 26171, - "motorized": 26172, - "##dome": 26173, - "##otted": 26174, - "squealed": 26175, - "tackled": 26176, - "canucks": 26177, - "budgets": 26178, - "situ": 26179, - "asthma": 26180, - "dail": 26181, - "gabled": 26182, - "grasslands": 26183, - "whimpered": 26184, - "writhing": 26185, - "judgments": 26186, - "##65": 26187, - "minnie": 26188, - "pv": 26189, - "##carbon": 26190, - "bananas": 26191, - "grille": 26192, - "domes": 26193, - "monique": 26194, - "odin": 26195, - "maguire": 26196, - "markham": 26197, - "tierney": 26198, - "##estra": 26199, - "##chua": 26200, - "libel": 26201, - "poke": 26202, - "speedy": 26203, - "atrium": 26204, - "laval": 26205, - "notwithstanding": 26206, - "##edly": 26207, - "fai": 26208, - "kala": 26209, - "##sur": 26210, - "robb": 26211, - "##sma": 26212, - "listings": 26213, - "luz": 26214, - "supplementary": 26215, - "tianjin": 26216, - "##acing": 26217, - "enzo": 26218, - "jd": 26219, - "ric": 26220, - "scanner": 26221, - "croats": 26222, - "transcribed": 26223, - "##49": 26224, - "arden": 26225, - "cv": 26226, - "##hair": 26227, - "##raphy": 26228, - "##lver": 26229, - "##uy": 26230, - "357": 26231, - "seventies": 26232, - "staggering": 26233, - "alam": 26234, - "horticultural": 26235, - "hs": 26236, - "regression": 26237, - "timbers": 26238, - "blasting": 26239, - "##ounded": 26240, - "montagu": 26241, - "manipulating": 26242, - "##cit": 26243, - "catalytic": 26244, - "1550": 26245, - "troopers": 26246, - "##meo": 26247, - "condemnation": 26248, - "fitzpatrick": 26249, - "##oire": 26250, - "##roved": 26251, - "inexperienced": 26252, - "1670": 26253, - "castes": 26254, - "##lative": 26255, - "outing": 26256, - "314": 26257, - "dubois": 26258, - "flicking": 26259, - "quarrel": 26260, - "ste": 26261, - "learners": 26262, - "1625": 26263, - "iq": 26264, - "whistled": 26265, - "##class": 26266, - "282": 26267, - "classify": 26268, - "tariffs": 26269, - "temperament": 26270, - "355": 26271, - "folly": 26272, - "liszt": 26273, - "##yles": 26274, - "immersed": 26275, - "jordanian": 26276, - "ceasefire": 26277, - "apparel": 26278, - "extras": 26279, - "maru": 26280, - "fished": 26281, - "##bio": 26282, - "harta": 26283, - "stockport": 26284, - "assortment": 26285, - "craftsman": 26286, - "paralysis": 26287, - "transmitters": 26288, - "##cola": 26289, - "blindness": 26290, - "##wk": 26291, - "fatally": 26292, - "proficiency": 26293, - "solemnly": 26294, - "##orno": 26295, - "repairing": 26296, - "amore": 26297, - "groceries": 26298, - "ultraviolet": 26299, - "##chase": 26300, - "schoolhouse": 26301, - "##tua": 26302, - "resurgence": 26303, - "nailed": 26304, - "##otype": 26305, - "##×": 26306, - "ruse": 26307, - "saliva": 26308, - "diagrams": 26309, - "##tructing": 26310, - "albans": 26311, - "rann": 26312, - "thirties": 26313, - "1b": 26314, - "antennas": 26315, - "hilarious": 26316, - "cougars": 26317, - "paddington": 26318, - "stats": 26319, - "##eger": 26320, - "breakaway": 26321, - "ipod": 26322, - "reza": 26323, - "authorship": 26324, - "prohibiting": 26325, - "scoffed": 26326, - "##etz": 26327, - "##ttle": 26328, - "conscription": 26329, - "defected": 26330, - "trondheim": 26331, - "##fires": 26332, - "ivanov": 26333, - "keenan": 26334, - "##adan": 26335, - "##ciful": 26336, - "##fb": 26337, - "##slow": 26338, - "locating": 26339, - "##ials": 26340, - "##tford": 26341, - "cadiz": 26342, - "basalt": 26343, - "blankly": 26344, - "interned": 26345, - "rags": 26346, - "rattling": 26347, - "##tick": 26348, - "carpathian": 26349, - "reassured": 26350, - "sync": 26351, - "bum": 26352, - "guildford": 26353, - "iss": 26354, - "staunch": 26355, - "##onga": 26356, - "astronomers": 26357, - "sera": 26358, - "sofie": 26359, - "emergencies": 26360, - "susquehanna": 26361, - "##heard": 26362, - "duc": 26363, - "mastery": 26364, - "vh1": 26365, - "williamsburg": 26366, - "bayer": 26367, - "buckled": 26368, - "craving": 26369, - "##khan": 26370, - "##rdes": 26371, - "bloomington": 26372, - "##write": 26373, - "alton": 26374, - "barbecue": 26375, - "##bians": 26376, - "justine": 26377, - "##hri": 26378, - "##ndt": 26379, - "delightful": 26380, - "smartphone": 26381, - "newtown": 26382, - "photon": 26383, - "retrieval": 26384, - "peugeot": 26385, - "hissing": 26386, - "##monium": 26387, - "##orough": 26388, - "flavors": 26389, - "lighted": 26390, - "relaunched": 26391, - "tainted": 26392, - "##games": 26393, - "##lysis": 26394, - "anarchy": 26395, - "microscopic": 26396, - "hopping": 26397, - "adept": 26398, - "evade": 26399, - "evie": 26400, - "##beau": 26401, - "inhibit": 26402, - "sinn": 26403, - "adjustable": 26404, - "hurst": 26405, - "intuition": 26406, - "wilton": 26407, - "cisco": 26408, - "44th": 26409, - "lawful": 26410, - "lowlands": 26411, - "stockings": 26412, - "thierry": 26413, - "##dalen": 26414, - "##hila": 26415, - "##nai": 26416, - "fates": 26417, - "prank": 26418, - "tb": 26419, - "maison": 26420, - "lobbied": 26421, - "provocative": 26422, - "1724": 26423, - "4a": 26424, - "utopia": 26425, - "##qual": 26426, - "carbonate": 26427, - "gujarati": 26428, - "purcell": 26429, - "##rford": 26430, - "curtiss": 26431, - "##mei": 26432, - "overgrown": 26433, - "arenas": 26434, - "mediation": 26435, - "swallows": 26436, - "##rnik": 26437, - "respectful": 26438, - "turnbull": 26439, - "##hedron": 26440, - "##hope": 26441, - "alyssa": 26442, - "ozone": 26443, - "##ʻi": 26444, - "ami": 26445, - "gestapo": 26446, - "johansson": 26447, - "snooker": 26448, - "canteen": 26449, - "cuff": 26450, - "declines": 26451, - "empathy": 26452, - "stigma": 26453, - "##ags": 26454, - "##iner": 26455, - "##raine": 26456, - "taxpayers": 26457, - "gui": 26458, - "volga": 26459, - "##wright": 26460, - "##copic": 26461, - "lifespan": 26462, - "overcame": 26463, - "tattooed": 26464, - "enactment": 26465, - "giggles": 26466, - "##ador": 26467, - "##camp": 26468, - "barrington": 26469, - "bribe": 26470, - "obligatory": 26471, - "orbiting": 26472, - "peng": 26473, - "##enas": 26474, - "elusive": 26475, - "sucker": 26476, - "##vating": 26477, - "cong": 26478, - "hardship": 26479, - "empowered": 26480, - "anticipating": 26481, - "estrada": 26482, - "cryptic": 26483, - "greasy": 26484, - "detainees": 26485, - "planck": 26486, - "sudbury": 26487, - "plaid": 26488, - "dod": 26489, - "marriott": 26490, - "kayla": 26491, - "##ears": 26492, - "##vb": 26493, - "##zd": 26494, - "mortally": 26495, - "##hein": 26496, - "cognition": 26497, - "radha": 26498, - "319": 26499, - "liechtenstein": 26500, - "meade": 26501, - "richly": 26502, - "argyle": 26503, - "harpsichord": 26504, - "liberalism": 26505, - "trumpets": 26506, - "lauded": 26507, - "tyrant": 26508, - "salsa": 26509, - "tiled": 26510, - "lear": 26511, - "promoters": 26512, - "reused": 26513, - "slicing": 26514, - "trident": 26515, - "##chuk": 26516, - "##gami": 26517, - "##lka": 26518, - "cantor": 26519, - "checkpoint": 26520, - "##points": 26521, - "gaul": 26522, - "leger": 26523, - "mammalian": 26524, - "##tov": 26525, - "##aar": 26526, - "##schaft": 26527, - "doha": 26528, - "frenchman": 26529, - "nirvana": 26530, - "##vino": 26531, - "delgado": 26532, - "headlining": 26533, - "##eron": 26534, - "##iography": 26535, - "jug": 26536, - "tko": 26537, - "1649": 26538, - "naga": 26539, - "intersections": 26540, - "##jia": 26541, - "benfica": 26542, - "nawab": 26543, - "##suka": 26544, - "ashford": 26545, - "gulp": 26546, - "##deck": 26547, - "##vill": 26548, - "##rug": 26549, - "brentford": 26550, - "frazier": 26551, - "pleasures": 26552, - "dunne": 26553, - "potsdam": 26554, - "shenzhen": 26555, - "dentistry": 26556, - "##tec": 26557, - "flanagan": 26558, - "##dorff": 26559, - "##hear": 26560, - "chorale": 26561, - "dinah": 26562, - "prem": 26563, - "quezon": 26564, - "##rogated": 26565, - "relinquished": 26566, - "sutra": 26567, - "terri": 26568, - "##pani": 26569, - "flaps": 26570, - "##rissa": 26571, - "poly": 26572, - "##rnet": 26573, - "homme": 26574, - "aback": 26575, - "##eki": 26576, - "linger": 26577, - "womb": 26578, - "##kson": 26579, - "##lewood": 26580, - "doorstep": 26581, - "orthodoxy": 26582, - "threaded": 26583, - "westfield": 26584, - "##rval": 26585, - "dioceses": 26586, - "fridays": 26587, - "subsided": 26588, - "##gata": 26589, - "loyalists": 26590, - "##biotic": 26591, - "##ettes": 26592, - "letterman": 26593, - "lunatic": 26594, - "prelate": 26595, - "tenderly": 26596, - "invariably": 26597, - "souza": 26598, - "thug": 26599, - "winslow": 26600, - "##otide": 26601, - "furlongs": 26602, - "gogh": 26603, - "jeopardy": 26604, - "##runa": 26605, - "pegasus": 26606, - "##umble": 26607, - "humiliated": 26608, - "standalone": 26609, - "tagged": 26610, - "##roller": 26611, - "freshmen": 26612, - "klan": 26613, - "##bright": 26614, - "attaining": 26615, - "initiating": 26616, - "transatlantic": 26617, - "logged": 26618, - "viz": 26619, - "##uance": 26620, - "1723": 26621, - "combatants": 26622, - "intervening": 26623, - "stephane": 26624, - "chieftain": 26625, - "despised": 26626, - "grazed": 26627, - "317": 26628, - "cdc": 26629, - "galveston": 26630, - "godzilla": 26631, - "macro": 26632, - "simulate": 26633, - "##planes": 26634, - "parades": 26635, - "##esses": 26636, - "960": 26637, - "##ductive": 26638, - "##unes": 26639, - "equator": 26640, - "overdose": 26641, - "##cans": 26642, - "##hosh": 26643, - "##lifting": 26644, - "joshi": 26645, - "epstein": 26646, - "sonora": 26647, - "treacherous": 26648, - "aquatics": 26649, - "manchu": 26650, - "responsive": 26651, - "##sation": 26652, - "supervisory": 26653, - "##christ": 26654, - "##llins": 26655, - "##ibar": 26656, - "##balance": 26657, - "##uso": 26658, - "kimball": 26659, - "karlsruhe": 26660, - "mab": 26661, - "##emy": 26662, - "ignores": 26663, - "phonetic": 26664, - "reuters": 26665, - "spaghetti": 26666, - "820": 26667, - "almighty": 26668, - "danzig": 26669, - "rumbling": 26670, - "tombstone": 26671, - "designations": 26672, - "lured": 26673, - "outset": 26674, - "##felt": 26675, - "supermarkets": 26676, - "##wt": 26677, - "grupo": 26678, - "kei": 26679, - "kraft": 26680, - "susanna": 26681, - "##blood": 26682, - "comprehension": 26683, - "genealogy": 26684, - "##aghan": 26685, - "##verted": 26686, - "redding": 26687, - "##ythe": 26688, - "1722": 26689, - "bowing": 26690, - "##pore": 26691, - "##roi": 26692, - "lest": 26693, - "sharpened": 26694, - "fulbright": 26695, - "valkyrie": 26696, - "sikhs": 26697, - "##unds": 26698, - "swans": 26699, - "bouquet": 26700, - "merritt": 26701, - "##tage": 26702, - "##venting": 26703, - "commuted": 26704, - "redhead": 26705, - "clerks": 26706, - "leasing": 26707, - "cesare": 26708, - "dea": 26709, - "hazy": 26710, - "##vances": 26711, - "fledged": 26712, - "greenfield": 26713, - "servicemen": 26714, - "##gical": 26715, - "armando": 26716, - "blackout": 26717, - "dt": 26718, - "sagged": 26719, - "downloadable": 26720, - "intra": 26721, - "potion": 26722, - "pods": 26723, - "##4th": 26724, - "##mism": 26725, - "xp": 26726, - "attendants": 26727, - "gambia": 26728, - "stale": 26729, - "##ntine": 26730, - "plump": 26731, - "asteroids": 26732, - "rediscovered": 26733, - "buds": 26734, - "flea": 26735, - "hive": 26736, - "##neas": 26737, - "1737": 26738, - "classifications": 26739, - "debuts": 26740, - "##eles": 26741, - "olympus": 26742, - "scala": 26743, - "##eurs": 26744, - "##gno": 26745, - "##mute": 26746, - "hummed": 26747, - "sigismund": 26748, - "visuals": 26749, - "wiggled": 26750, - "await": 26751, - "pilasters": 26752, - "clench": 26753, - "sulfate": 26754, - "##ances": 26755, - "bellevue": 26756, - "enigma": 26757, - "trainee": 26758, - "snort": 26759, - "##sw": 26760, - "clouded": 26761, - "denim": 26762, - "##rank": 26763, - "##rder": 26764, - "churning": 26765, - "hartman": 26766, - "lodges": 26767, - "riches": 26768, - "sima": 26769, - "##missible": 26770, - "accountable": 26771, - "socrates": 26772, - "regulates": 26773, - "mueller": 26774, - "##cr": 26775, - "1702": 26776, - "avoids": 26777, - "solids": 26778, - "himalayas": 26779, - "nutrient": 26780, - "pup": 26781, - "##jevic": 26782, - "squat": 26783, - "fades": 26784, - "nec": 26785, - "##lates": 26786, - "##pina": 26787, - "##rona": 26788, - "##ου": 26789, - "privateer": 26790, - "tequila": 26791, - "##gative": 26792, - "##mpton": 26793, - "apt": 26794, - "hornet": 26795, - "immortals": 26796, - "##dou": 26797, - "asturias": 26798, - "cleansing": 26799, - "dario": 26800, - "##rries": 26801, - "##anta": 26802, - "etymology": 26803, - "servicing": 26804, - "zhejiang": 26805, - "##venor": 26806, - "##nx": 26807, - "horned": 26808, - "erasmus": 26809, - "rayon": 26810, - "relocating": 26811, - "£10": 26812, - "##bags": 26813, - "escalated": 26814, - "promenade": 26815, - "stubble": 26816, - "2010s": 26817, - "artisans": 26818, - "axial": 26819, - "liquids": 26820, - "mora": 26821, - "sho": 26822, - "yoo": 26823, - "##tsky": 26824, - "bundles": 26825, - "oldies": 26826, - "##nally": 26827, - "notification": 26828, - "bastion": 26829, - "##ths": 26830, - "sparkle": 26831, - "##lved": 26832, - "1728": 26833, - "leash": 26834, - "pathogen": 26835, - "highs": 26836, - "##hmi": 26837, - "immature": 26838, - "880": 26839, - "gonzaga": 26840, - "ignatius": 26841, - "mansions": 26842, - "monterrey": 26843, - "sweets": 26844, - "bryson": 26845, - "##loe": 26846, - "polled": 26847, - "regatta": 26848, - "brightest": 26849, - "pei": 26850, - "rosy": 26851, - "squid": 26852, - "hatfield": 26853, - "payroll": 26854, - "addict": 26855, - "meath": 26856, - "cornerback": 26857, - "heaviest": 26858, - "lodging": 26859, - "##mage": 26860, - "capcom": 26861, - "rippled": 26862, - "##sily": 26863, - "barnet": 26864, - "mayhem": 26865, - "ymca": 26866, - "snuggled": 26867, - "rousseau": 26868, - "##cute": 26869, - "blanchard": 26870, - "284": 26871, - "fragmented": 26872, - "leighton": 26873, - "chromosomes": 26874, - "risking": 26875, - "##md": 26876, - "##strel": 26877, - "##utter": 26878, - "corinne": 26879, - "coyotes": 26880, - "cynical": 26881, - "hiroshi": 26882, - "yeomanry": 26883, - "##ractive": 26884, - "ebook": 26885, - "grading": 26886, - "mandela": 26887, - "plume": 26888, - "agustin": 26889, - "magdalene": 26890, - "##rkin": 26891, - "bea": 26892, - "femme": 26893, - "trafford": 26894, - "##coll": 26895, - "##lun": 26896, - "##tance": 26897, - "52nd": 26898, - "fourier": 26899, - "upton": 26900, - "##mental": 26901, - "camilla": 26902, - "gust": 26903, - "iihf": 26904, - "islamabad": 26905, - "longevity": 26906, - "##kala": 26907, - "feldman": 26908, - "netting": 26909, - "##rization": 26910, - "endeavour": 26911, - "foraging": 26912, - "mfa": 26913, - "orr": 26914, - "##open": 26915, - "greyish": 26916, - "contradiction": 26917, - "graz": 26918, - "##ruff": 26919, - "handicapped": 26920, - "marlene": 26921, - "tweed": 26922, - "oaxaca": 26923, - "spp": 26924, - "campos": 26925, - "miocene": 26926, - "pri": 26927, - "configured": 26928, - "cooks": 26929, - "pluto": 26930, - "cozy": 26931, - "pornographic": 26932, - "##entes": 26933, - "70th": 26934, - "fairness": 26935, - "glided": 26936, - "jonny": 26937, - "lynne": 26938, - "rounding": 26939, - "sired": 26940, - "##emon": 26941, - "##nist": 26942, - "remade": 26943, - "uncover": 26944, - "##mack": 26945, - "complied": 26946, - "lei": 26947, - "newsweek": 26948, - "##jured": 26949, - "##parts": 26950, - "##enting": 26951, - "##pg": 26952, - "293": 26953, - "finer": 26954, - "guerrillas": 26955, - "athenian": 26956, - "deng": 26957, - "disused": 26958, - "stepmother": 26959, - "accuse": 26960, - "gingerly": 26961, - "seduction": 26962, - "521": 26963, - "confronting": 26964, - "##walker": 26965, - "##going": 26966, - "gora": 26967, - "nostalgia": 26968, - "sabres": 26969, - "virginity": 26970, - "wrenched": 26971, - "##minated": 26972, - "syndication": 26973, - "wielding": 26974, - "eyre": 26975, - "##56": 26976, - "##gnon": 26977, - "##igny": 26978, - "behaved": 26979, - "taxpayer": 26980, - "sweeps": 26981, - "##growth": 26982, - "childless": 26983, - "gallant": 26984, - "##ywood": 26985, - "amplified": 26986, - "geraldine": 26987, - "scrape": 26988, - "##ffi": 26989, - "babylonian": 26990, - "fresco": 26991, - "##rdan": 26992, - "##kney": 26993, - "##position": 26994, - "1718": 26995, - "restricting": 26996, - "tack": 26997, - "fukuoka": 26998, - "osborn": 26999, - "selector": 27000, - "partnering": 27001, - "##dlow": 27002, - "318": 27003, - "gnu": 27004, - "kia": 27005, - "tak": 27006, - "whitley": 27007, - "gables": 27008, - "##54": 27009, - "##mania": 27010, - "mri": 27011, - "softness": 27012, - "immersion": 27013, - "##bots": 27014, - "##evsky": 27015, - "1713": 27016, - "chilling": 27017, - "insignificant": 27018, - "pcs": 27019, - "##uis": 27020, - "elites": 27021, - "lina": 27022, - "purported": 27023, - "supplemental": 27024, - "teaming": 27025, - "##americana": 27026, - "##dding": 27027, - "##inton": 27028, - "proficient": 27029, - "rouen": 27030, - "##nage": 27031, - "##rret": 27032, - "niccolo": 27033, - "selects": 27034, - "##bread": 27035, - "fluffy": 27036, - "1621": 27037, - "gruff": 27038, - "knotted": 27039, - "mukherjee": 27040, - "polgara": 27041, - "thrash": 27042, - "nicholls": 27043, - "secluded": 27044, - "smoothing": 27045, - "thru": 27046, - "corsica": 27047, - "loaf": 27048, - "whitaker": 27049, - "inquiries": 27050, - "##rrier": 27051, - "##kam": 27052, - "indochina": 27053, - "289": 27054, - "marlins": 27055, - "myles": 27056, - "peking": 27057, - "##tea": 27058, - "extracts": 27059, - "pastry": 27060, - "superhuman": 27061, - "connacht": 27062, - "vogel": 27063, - "##ditional": 27064, - "##het": 27065, - "##udged": 27066, - "##lash": 27067, - "gloss": 27068, - "quarries": 27069, - "refit": 27070, - "teaser": 27071, - "##alic": 27072, - "##gaon": 27073, - "20s": 27074, - "materialized": 27075, - "sling": 27076, - "camped": 27077, - "pickering": 27078, - "tung": 27079, - "tracker": 27080, - "pursuant": 27081, - "##cide": 27082, - "cranes": 27083, - "soc": 27084, - "##cini": 27085, - "##typical": 27086, - "##viere": 27087, - "anhalt": 27088, - "overboard": 27089, - "workout": 27090, - "chores": 27091, - "fares": 27092, - "orphaned": 27093, - "stains": 27094, - "##logie": 27095, - "fenton": 27096, - "surpassing": 27097, - "joyah": 27098, - "triggers": 27099, - "##itte": 27100, - "grandmaster": 27101, - "##lass": 27102, - "##lists": 27103, - "clapping": 27104, - "fraudulent": 27105, - "ledger": 27106, - "nagasaki": 27107, - "##cor": 27108, - "##nosis": 27109, - "##tsa": 27110, - "eucalyptus": 27111, - "tun": 27112, - "##icio": 27113, - "##rney": 27114, - "##tara": 27115, - "dax": 27116, - "heroism": 27117, - "ina": 27118, - "wrexham": 27119, - "onboard": 27120, - "unsigned": 27121, - "##dates": 27122, - "moshe": 27123, - "galley": 27124, - "winnie": 27125, - "droplets": 27126, - "exiles": 27127, - "praises": 27128, - "watered": 27129, - "noodles": 27130, - "##aia": 27131, - "fein": 27132, - "adi": 27133, - "leland": 27134, - "multicultural": 27135, - "stink": 27136, - "bingo": 27137, - "comets": 27138, - "erskine": 27139, - "modernized": 27140, - "canned": 27141, - "constraint": 27142, - "domestically": 27143, - "chemotherapy": 27144, - "featherweight": 27145, - "stifled": 27146, - "##mum": 27147, - "darkly": 27148, - "irresistible": 27149, - "refreshing": 27150, - "hasty": 27151, - "isolate": 27152, - "##oys": 27153, - "kitchener": 27154, - "planners": 27155, - "##wehr": 27156, - "cages": 27157, - "yarn": 27158, - "implant": 27159, - "toulon": 27160, - "elects": 27161, - "childbirth": 27162, - "yue": 27163, - "##lind": 27164, - "##lone": 27165, - "cn": 27166, - "rightful": 27167, - "sportsman": 27168, - "junctions": 27169, - "remodeled": 27170, - "specifies": 27171, - "##rgh": 27172, - "291": 27173, - "##oons": 27174, - "complimented": 27175, - "##urgent": 27176, - "lister": 27177, - "ot": 27178, - "##logic": 27179, - "bequeathed": 27180, - "cheekbones": 27181, - "fontana": 27182, - "gabby": 27183, - "##dial": 27184, - "amadeus": 27185, - "corrugated": 27186, - "maverick": 27187, - "resented": 27188, - "triangles": 27189, - "##hered": 27190, - "##usly": 27191, - "nazareth": 27192, - "tyrol": 27193, - "1675": 27194, - "assent": 27195, - "poorer": 27196, - "sectional": 27197, - "aegean": 27198, - "##cous": 27199, - "296": 27200, - "nylon": 27201, - "ghanaian": 27202, - "##egorical": 27203, - "##weig": 27204, - "cushions": 27205, - "forbid": 27206, - "fusiliers": 27207, - "obstruction": 27208, - "somerville": 27209, - "##scia": 27210, - "dime": 27211, - "earrings": 27212, - "elliptical": 27213, - "leyte": 27214, - "oder": 27215, - "polymers": 27216, - "timmy": 27217, - "atm": 27218, - "midtown": 27219, - "piloted": 27220, - "settles": 27221, - "continual": 27222, - "externally": 27223, - "mayfield": 27224, - "##uh": 27225, - "enrichment": 27226, - "henson": 27227, - "keane": 27228, - "persians": 27229, - "1733": 27230, - "benji": 27231, - "braden": 27232, - "pep": 27233, - "324": 27234, - "##efe": 27235, - "contenders": 27236, - "pepsi": 27237, - "valet": 27238, - "##isches": 27239, - "298": 27240, - "##asse": 27241, - "##earing": 27242, - "goofy": 27243, - "stroll": 27244, - "##amen": 27245, - "authoritarian": 27246, - "occurrences": 27247, - "adversary": 27248, - "ahmedabad": 27249, - "tangent": 27250, - "toppled": 27251, - "dorchester": 27252, - "1672": 27253, - "modernism": 27254, - "marxism": 27255, - "islamist": 27256, - "charlemagne": 27257, - "exponential": 27258, - "racks": 27259, - "unicode": 27260, - "brunette": 27261, - "mbc": 27262, - "pic": 27263, - "skirmish": 27264, - "##bund": 27265, - "##lad": 27266, - "##powered": 27267, - "##yst": 27268, - "hoisted": 27269, - "messina": 27270, - "shatter": 27271, - "##ctum": 27272, - "jedi": 27273, - "vantage": 27274, - "##music": 27275, - "##neil": 27276, - "clemens": 27277, - "mahmoud": 27278, - "corrupted": 27279, - "authentication": 27280, - "lowry": 27281, - "nils": 27282, - "##washed": 27283, - "omnibus": 27284, - "wounding": 27285, - "jillian": 27286, - "##itors": 27287, - "##opped": 27288, - "serialized": 27289, - "narcotics": 27290, - "handheld": 27291, - "##arm": 27292, - "##plicity": 27293, - "intersecting": 27294, - "stimulating": 27295, - "##onis": 27296, - "crate": 27297, - "fellowships": 27298, - "hemingway": 27299, - "casinos": 27300, - "climatic": 27301, - "fordham": 27302, - "copeland": 27303, - "drip": 27304, - "beatty": 27305, - "leaflets": 27306, - "robber": 27307, - "brothel": 27308, - "madeira": 27309, - "##hedral": 27310, - "sphinx": 27311, - "ultrasound": 27312, - "##vana": 27313, - "valor": 27314, - "forbade": 27315, - "leonid": 27316, - "villas": 27317, - "##aldo": 27318, - "duane": 27319, - "marquez": 27320, - "##cytes": 27321, - "disadvantaged": 27322, - "forearms": 27323, - "kawasaki": 27324, - "reacts": 27325, - "consular": 27326, - "lax": 27327, - "uncles": 27328, - "uphold": 27329, - "##hopper": 27330, - "concepcion": 27331, - "dorsey": 27332, - "lass": 27333, - "##izan": 27334, - "arching": 27335, - "passageway": 27336, - "1708": 27337, - "researches": 27338, - "tia": 27339, - "internationals": 27340, - "##graphs": 27341, - "##opers": 27342, - "distinguishes": 27343, - "javanese": 27344, - "divert": 27345, - "##uven": 27346, - "plotted": 27347, - "##listic": 27348, - "##rwin": 27349, - "##erik": 27350, - "##tify": 27351, - "affirmative": 27352, - "signifies": 27353, - "validation": 27354, - "##bson": 27355, - "kari": 27356, - "felicity": 27357, - "georgina": 27358, - "zulu": 27359, - "##eros": 27360, - "##rained": 27361, - "##rath": 27362, - "overcoming": 27363, - "##dot": 27364, - "argyll": 27365, - "##rbin": 27366, - "1734": 27367, - "chiba": 27368, - "ratification": 27369, - "windy": 27370, - "earls": 27371, - "parapet": 27372, - "##marks": 27373, - "hunan": 27374, - "pristine": 27375, - "astrid": 27376, - "punta": 27377, - "##gart": 27378, - "brodie": 27379, - "##kota": 27380, - "##oder": 27381, - "malaga": 27382, - "minerva": 27383, - "rouse": 27384, - "##phonic": 27385, - "bellowed": 27386, - "pagoda": 27387, - "portals": 27388, - "reclamation": 27389, - "##gur": 27390, - "##odies": 27391, - "##⁄₄": 27392, - "parentheses": 27393, - "quoting": 27394, - "allergic": 27395, - "palette": 27396, - "showcases": 27397, - "benefactor": 27398, - "heartland": 27399, - "nonlinear": 27400, - "##tness": 27401, - "bladed": 27402, - "cheerfully": 27403, - "scans": 27404, - "##ety": 27405, - "##hone": 27406, - "1666": 27407, - "girlfriends": 27408, - "pedersen": 27409, - "hiram": 27410, - "sous": 27411, - "##liche": 27412, - "##nator": 27413, - "1683": 27414, - "##nery": 27415, - "##orio": 27416, - "##umen": 27417, - "bobo": 27418, - "primaries": 27419, - "smiley": 27420, - "##cb": 27421, - "unearthed": 27422, - "uniformly": 27423, - "fis": 27424, - "metadata": 27425, - "1635": 27426, - "ind": 27427, - "##oted": 27428, - "recoil": 27429, - "##titles": 27430, - "##tura": 27431, - "##ια": 27432, - "406": 27433, - "hilbert": 27434, - "jamestown": 27435, - "mcmillan": 27436, - "tulane": 27437, - "seychelles": 27438, - "##frid": 27439, - "antics": 27440, - "coli": 27441, - "fated": 27442, - "stucco": 27443, - "##grants": 27444, - "1654": 27445, - "bulky": 27446, - "accolades": 27447, - "arrays": 27448, - "caledonian": 27449, - "carnage": 27450, - "optimism": 27451, - "puebla": 27452, - "##tative": 27453, - "##cave": 27454, - "enforcing": 27455, - "rotherham": 27456, - "seo": 27457, - "dunlop": 27458, - "aeronautics": 27459, - "chimed": 27460, - "incline": 27461, - "zoning": 27462, - "archduke": 27463, - "hellenistic": 27464, - "##oses": 27465, - "##sions": 27466, - "candi": 27467, - "thong": 27468, - "##ople": 27469, - "magnate": 27470, - "rustic": 27471, - "##rsk": 27472, - "projective": 27473, - "slant": 27474, - "##offs": 27475, - "danes": 27476, - "hollis": 27477, - "vocalists": 27478, - "##ammed": 27479, - "congenital": 27480, - "contend": 27481, - "gesellschaft": 27482, - "##ocating": 27483, - "##pressive": 27484, - "douglass": 27485, - "quieter": 27486, - "##cm": 27487, - "##kshi": 27488, - "howled": 27489, - "salim": 27490, - "spontaneously": 27491, - "townsville": 27492, - "buena": 27493, - "southport": 27494, - "##bold": 27495, - "kato": 27496, - "1638": 27497, - "faerie": 27498, - "stiffly": 27499, - "##vus": 27500, - "##rled": 27501, - "297": 27502, - "flawless": 27503, - "realising": 27504, - "taboo": 27505, - "##7th": 27506, - "bytes": 27507, - "straightening": 27508, - "356": 27509, - "jena": 27510, - "##hid": 27511, - "##rmin": 27512, - "cartwright": 27513, - "berber": 27514, - "bertram": 27515, - "soloists": 27516, - "411": 27517, - "noses": 27518, - "417": 27519, - "coping": 27520, - "fission": 27521, - "hardin": 27522, - "inca": 27523, - "##cen": 27524, - "1717": 27525, - "mobilized": 27526, - "vhf": 27527, - "##raf": 27528, - "biscuits": 27529, - "curate": 27530, - "##85": 27531, - "##anial": 27532, - "331": 27533, - "gaunt": 27534, - "neighbourhoods": 27535, - "1540": 27536, - "##abas": 27537, - "blanca": 27538, - "bypassed": 27539, - "sockets": 27540, - "behold": 27541, - "coincidentally": 27542, - "##bane": 27543, - "nara": 27544, - "shave": 27545, - "splinter": 27546, - "terrific": 27547, - "##arion": 27548, - "##erian": 27549, - "commonplace": 27550, - "juris": 27551, - "redwood": 27552, - "waistband": 27553, - "boxed": 27554, - "caitlin": 27555, - "fingerprints": 27556, - "jennie": 27557, - "naturalized": 27558, - "##ired": 27559, - "balfour": 27560, - "craters": 27561, - "jody": 27562, - "bungalow": 27563, - "hugely": 27564, - "quilt": 27565, - "glitter": 27566, - "pigeons": 27567, - "undertaker": 27568, - "bulging": 27569, - "constrained": 27570, - "goo": 27571, - "##sil": 27572, - "##akh": 27573, - "assimilation": 27574, - "reworked": 27575, - "##person": 27576, - "persuasion": 27577, - "##pants": 27578, - "felicia": 27579, - "##cliff": 27580, - "##ulent": 27581, - "1732": 27582, - "explodes": 27583, - "##dun": 27584, - "##inium": 27585, - "##zic": 27586, - "lyman": 27587, - "vulture": 27588, - "hog": 27589, - "overlook": 27590, - "begs": 27591, - "northwards": 27592, - "ow": 27593, - "spoil": 27594, - "##urer": 27595, - "fatima": 27596, - "favorably": 27597, - "accumulate": 27598, - "sargent": 27599, - "sorority": 27600, - "corresponded": 27601, - "dispersal": 27602, - "kochi": 27603, - "toned": 27604, - "##imi": 27605, - "##lita": 27606, - "internacional": 27607, - "newfound": 27608, - "##agger": 27609, - "##lynn": 27610, - "##rigue": 27611, - "booths": 27612, - "peanuts": 27613, - "##eborg": 27614, - "medicare": 27615, - "muriel": 27616, - "nur": 27617, - "##uram": 27618, - "crates": 27619, - "millennia": 27620, - "pajamas": 27621, - "worsened": 27622, - "##breakers": 27623, - "jimi": 27624, - "vanuatu": 27625, - "yawned": 27626, - "##udeau": 27627, - "carousel": 27628, - "##hony": 27629, - "hurdle": 27630, - "##ccus": 27631, - "##mounted": 27632, - "##pod": 27633, - "rv": 27634, - "##eche": 27635, - "airship": 27636, - "ambiguity": 27637, - "compulsion": 27638, - "recapture": 27639, - "##claiming": 27640, - "arthritis": 27641, - "##osomal": 27642, - "1667": 27643, - "asserting": 27644, - "ngc": 27645, - "sniffing": 27646, - "dade": 27647, - "discontent": 27648, - "glendale": 27649, - "ported": 27650, - "##amina": 27651, - "defamation": 27652, - "rammed": 27653, - "##scent": 27654, - "fling": 27655, - "livingstone": 27656, - "##fleet": 27657, - "875": 27658, - "##ppy": 27659, - "apocalyptic": 27660, - "comrade": 27661, - "lcd": 27662, - "##lowe": 27663, - "cessna": 27664, - "eine": 27665, - "persecuted": 27666, - "subsistence": 27667, - "demi": 27668, - "hoop": 27669, - "reliefs": 27670, - "710": 27671, - "coptic": 27672, - "progressing": 27673, - "stemmed": 27674, - "perpetrators": 27675, - "1665": 27676, - "priestess": 27677, - "##nio": 27678, - "dobson": 27679, - "ebony": 27680, - "rooster": 27681, - "itf": 27682, - "tortricidae": 27683, - "##bbon": 27684, - "##jian": 27685, - "cleanup": 27686, - "##jean": 27687, - "##øy": 27688, - "1721": 27689, - "eighties": 27690, - "taxonomic": 27691, - "holiness": 27692, - "##hearted": 27693, - "##spar": 27694, - "antilles": 27695, - "showcasing": 27696, - "stabilized": 27697, - "##nb": 27698, - "gia": 27699, - "mascara": 27700, - "michelangelo": 27701, - "dawned": 27702, - "##uria": 27703, - "##vinsky": 27704, - "extinguished": 27705, - "fitz": 27706, - "grotesque": 27707, - "£100": 27708, - "##fera": 27709, - "##loid": 27710, - "##mous": 27711, - "barges": 27712, - "neue": 27713, - "throbbed": 27714, - "cipher": 27715, - "johnnie": 27716, - "##a1": 27717, - "##mpt": 27718, - "outburst": 27719, - "##swick": 27720, - "spearheaded": 27721, - "administrations": 27722, - "c1": 27723, - "heartbreak": 27724, - "pixels": 27725, - "pleasantly": 27726, - "##enay": 27727, - "lombardy": 27728, - "plush": 27729, - "##nsed": 27730, - "bobbie": 27731, - "##hly": 27732, - "reapers": 27733, - "tremor": 27734, - "xiang": 27735, - "minogue": 27736, - "substantive": 27737, - "hitch": 27738, - "barak": 27739, - "##wyl": 27740, - "kwan": 27741, - "##encia": 27742, - "910": 27743, - "obscene": 27744, - "elegance": 27745, - "indus": 27746, - "surfer": 27747, - "bribery": 27748, - "conserve": 27749, - "##hyllum": 27750, - "##masters": 27751, - "horatio": 27752, - "##fat": 27753, - "apes": 27754, - "rebound": 27755, - "psychotic": 27756, - "##pour": 27757, - "iteration": 27758, - "##mium": 27759, - "##vani": 27760, - "botanic": 27761, - "horribly": 27762, - "antiques": 27763, - "dispose": 27764, - "paxton": 27765, - "##hli": 27766, - "##wg": 27767, - "timeless": 27768, - "1704": 27769, - "disregard": 27770, - "engraver": 27771, - "hounds": 27772, - "##bau": 27773, - "##version": 27774, - "looted": 27775, - "uno": 27776, - "facilitates": 27777, - "groans": 27778, - "masjid": 27779, - "rutland": 27780, - "antibody": 27781, - "disqualification": 27782, - "decatur": 27783, - "footballers": 27784, - "quake": 27785, - "slacks": 27786, - "48th": 27787, - "rein": 27788, - "scribe": 27789, - "stabilize": 27790, - "commits": 27791, - "exemplary": 27792, - "tho": 27793, - "##hort": 27794, - "##chison": 27795, - "pantry": 27796, - "traversed": 27797, - "##hiti": 27798, - "disrepair": 27799, - "identifiable": 27800, - "vibrated": 27801, - "baccalaureate": 27802, - "##nnis": 27803, - "csa": 27804, - "interviewing": 27805, - "##iensis": 27806, - "##raße": 27807, - "greaves": 27808, - "wealthiest": 27809, - "343": 27810, - "classed": 27811, - "jogged": 27812, - "£5": 27813, - "##58": 27814, - "##atal": 27815, - "illuminating": 27816, - "knicks": 27817, - "respecting": 27818, - "##uno": 27819, - "scrubbed": 27820, - "##iji": 27821, - "##dles": 27822, - "kruger": 27823, - "moods": 27824, - "growls": 27825, - "raider": 27826, - "silvia": 27827, - "chefs": 27828, - "kam": 27829, - "vr": 27830, - "cree": 27831, - "percival": 27832, - "##terol": 27833, - "gunter": 27834, - "counterattack": 27835, - "defiant": 27836, - "henan": 27837, - "ze": 27838, - "##rasia": 27839, - "##riety": 27840, - "equivalence": 27841, - "submissions": 27842, - "##fra": 27843, - "##thor": 27844, - "bautista": 27845, - "mechanically": 27846, - "##heater": 27847, - "cornice": 27848, - "herbal": 27849, - "templar": 27850, - "##mering": 27851, - "outputs": 27852, - "ruining": 27853, - "ligand": 27854, - "renumbered": 27855, - "extravagant": 27856, - "mika": 27857, - "blockbuster": 27858, - "eta": 27859, - "insurrection": 27860, - "##ilia": 27861, - "darkening": 27862, - "ferocious": 27863, - "pianos": 27864, - "strife": 27865, - "kinship": 27866, - "##aer": 27867, - "melee": 27868, - "##anor": 27869, - "##iste": 27870, - "##may": 27871, - "##oue": 27872, - "decidedly": 27873, - "weep": 27874, - "##jad": 27875, - "##missive": 27876, - "##ppel": 27877, - "354": 27878, - "puget": 27879, - "unease": 27880, - "##gnant": 27881, - "1629": 27882, - "hammering": 27883, - "kassel": 27884, - "ob": 27885, - "wessex": 27886, - "##lga": 27887, - "bromwich": 27888, - "egan": 27889, - "paranoia": 27890, - "utilization": 27891, - "##atable": 27892, - "##idad": 27893, - "contradictory": 27894, - "provoke": 27895, - "##ols": 27896, - "##ouring": 27897, - "##tangled": 27898, - "knesset": 27899, - "##very": 27900, - "##lette": 27901, - "plumbing": 27902, - "##sden": 27903, - "##¹": 27904, - "greensboro": 27905, - "occult": 27906, - "sniff": 27907, - "338": 27908, - "zev": 27909, - "beaming": 27910, - "gamer": 27911, - "haggard": 27912, - "mahal": 27913, - "##olt": 27914, - "##pins": 27915, - "mendes": 27916, - "utmost": 27917, - "briefing": 27918, - "gunnery": 27919, - "##gut": 27920, - "##pher": 27921, - "##zh": 27922, - "##rok": 27923, - "1679": 27924, - "khalifa": 27925, - "sonya": 27926, - "##boot": 27927, - "principals": 27928, - "urbana": 27929, - "wiring": 27930, - "##liffe": 27931, - "##minating": 27932, - "##rrado": 27933, - "dahl": 27934, - "nyu": 27935, - "skepticism": 27936, - "np": 27937, - "townspeople": 27938, - "ithaca": 27939, - "lobster": 27940, - "somethin": 27941, - "##fur": 27942, - "##arina": 27943, - "##−1": 27944, - "freighter": 27945, - "zimmerman": 27946, - "biceps": 27947, - "contractual": 27948, - "##herton": 27949, - "amend": 27950, - "hurrying": 27951, - "subconscious": 27952, - "##anal": 27953, - "336": 27954, - "meng": 27955, - "clermont": 27956, - "spawning": 27957, - "##eia": 27958, - "##lub": 27959, - "dignitaries": 27960, - "impetus": 27961, - "snacks": 27962, - "spotting": 27963, - "twigs": 27964, - "##bilis": 27965, - "##cz": 27966, - "##ouk": 27967, - "libertadores": 27968, - "nic": 27969, - "skylar": 27970, - "##aina": 27971, - "##firm": 27972, - "gustave": 27973, - "asean": 27974, - "##anum": 27975, - "dieter": 27976, - "legislatures": 27977, - "flirt": 27978, - "bromley": 27979, - "trolls": 27980, - "umar": 27981, - "##bbies": 27982, - "##tyle": 27983, - "blah": 27984, - "parc": 27985, - "bridgeport": 27986, - "crank": 27987, - "negligence": 27988, - "##nction": 27989, - "46th": 27990, - "constantin": 27991, - "molded": 27992, - "bandages": 27993, - "seriousness": 27994, - "00pm": 27995, - "siegel": 27996, - "carpets": 27997, - "compartments": 27998, - "upbeat": 27999, - "statehood": 28000, - "##dner": 28001, - "##edging": 28002, - "marko": 28003, - "730": 28004, - "platt": 28005, - "##hane": 28006, - "paving": 28007, - "##iy": 28008, - "1738": 28009, - "abbess": 28010, - "impatience": 28011, - "limousine": 28012, - "nbl": 28013, - "##talk": 28014, - "441": 28015, - "lucille": 28016, - "mojo": 28017, - "nightfall": 28018, - "robbers": 28019, - "##nais": 28020, - "karel": 28021, - "brisk": 28022, - "calves": 28023, - "replicate": 28024, - "ascribed": 28025, - "telescopes": 28026, - "##olf": 28027, - "intimidated": 28028, - "##reen": 28029, - "ballast": 28030, - "specialization": 28031, - "##sit": 28032, - "aerodynamic": 28033, - "caliphate": 28034, - "rainer": 28035, - "visionary": 28036, - "##arded": 28037, - "epsilon": 28038, - "##aday": 28039, - "##onte": 28040, - "aggregation": 28041, - "auditory": 28042, - "boosted": 28043, - "reunification": 28044, - "kathmandu": 28045, - "loco": 28046, - "robyn": 28047, - "402": 28048, - "acknowledges": 28049, - "appointing": 28050, - "humanoid": 28051, - "newell": 28052, - "redeveloped": 28053, - "restraints": 28054, - "##tained": 28055, - "barbarians": 28056, - "chopper": 28057, - "1609": 28058, - "italiana": 28059, - "##lez": 28060, - "##lho": 28061, - "investigates": 28062, - "wrestlemania": 28063, - "##anies": 28064, - "##bib": 28065, - "690": 28066, - "##falls": 28067, - "creaked": 28068, - "dragoons": 28069, - "gravely": 28070, - "minions": 28071, - "stupidity": 28072, - "volley": 28073, - "##harat": 28074, - "##week": 28075, - "musik": 28076, - "##eries": 28077, - "##uously": 28078, - "fungal": 28079, - "massimo": 28080, - "semantics": 28081, - "malvern": 28082, - "##ahl": 28083, - "##pee": 28084, - "discourage": 28085, - "embryo": 28086, - "imperialism": 28087, - "1910s": 28088, - "profoundly": 28089, - "##ddled": 28090, - "jiangsu": 28091, - "sparkled": 28092, - "stat": 28093, - "##holz": 28094, - "sweatshirt": 28095, - "tobin": 28096, - "##iction": 28097, - "sneered": 28098, - "##cheon": 28099, - "##oit": 28100, - "brit": 28101, - "causal": 28102, - "smyth": 28103, - "##neuve": 28104, - "diffuse": 28105, - "perrin": 28106, - "silvio": 28107, - "##ipes": 28108, - "##recht": 28109, - "detonated": 28110, - "iqbal": 28111, - "selma": 28112, - "##nism": 28113, - "##zumi": 28114, - "roasted": 28115, - "##riders": 28116, - "tay": 28117, - "##ados": 28118, - "##mament": 28119, - "##mut": 28120, - "##rud": 28121, - "840": 28122, - "completes": 28123, - "nipples": 28124, - "cfa": 28125, - "flavour": 28126, - "hirsch": 28127, - "##laus": 28128, - "calderon": 28129, - "sneakers": 28130, - "moravian": 28131, - "##ksha": 28132, - "1622": 28133, - "rq": 28134, - "294": 28135, - "##imeters": 28136, - "bodo": 28137, - "##isance": 28138, - "##pre": 28139, - "##ronia": 28140, - "anatomical": 28141, - "excerpt": 28142, - "##lke": 28143, - "dh": 28144, - "kunst": 28145, - "##tablished": 28146, - "##scoe": 28147, - "biomass": 28148, - "panted": 28149, - "unharmed": 28150, - "gael": 28151, - "housemates": 28152, - "montpellier": 28153, - "##59": 28154, - "coa": 28155, - "rodents": 28156, - "tonic": 28157, - "hickory": 28158, - "singleton": 28159, - "##taro": 28160, - "451": 28161, - "1719": 28162, - "aldo": 28163, - "breaststroke": 28164, - "dempsey": 28165, - "och": 28166, - "rocco": 28167, - "##cuit": 28168, - "merton": 28169, - "dissemination": 28170, - "midsummer": 28171, - "serials": 28172, - "##idi": 28173, - "haji": 28174, - "polynomials": 28175, - "##rdon": 28176, - "gs": 28177, - "enoch": 28178, - "prematurely": 28179, - "shutter": 28180, - "taunton": 28181, - "£3": 28182, - "##grating": 28183, - "##inates": 28184, - "archangel": 28185, - "harassed": 28186, - "##asco": 28187, - "326": 28188, - "archway": 28189, - "dazzling": 28190, - "##ecin": 28191, - "1736": 28192, - "sumo": 28193, - "wat": 28194, - "##kovich": 28195, - "1086": 28196, - "honneur": 28197, - "##ently": 28198, - "##nostic": 28199, - "##ttal": 28200, - "##idon": 28201, - "1605": 28202, - "403": 28203, - "1716": 28204, - "blogger": 28205, - "rents": 28206, - "##gnan": 28207, - "hires": 28208, - "##ikh": 28209, - "##dant": 28210, - "howie": 28211, - "##rons": 28212, - "handler": 28213, - "retracted": 28214, - "shocks": 28215, - "1632": 28216, - "arun": 28217, - "duluth": 28218, - "kepler": 28219, - "trumpeter": 28220, - "##lary": 28221, - "peeking": 28222, - "seasoned": 28223, - "trooper": 28224, - "##mara": 28225, - "laszlo": 28226, - "##iciencies": 28227, - "##rti": 28228, - "heterosexual": 28229, - "##inatory": 28230, - "##ssion": 28231, - "indira": 28232, - "jogging": 28233, - "##inga": 28234, - "##lism": 28235, - "beit": 28236, - "dissatisfaction": 28237, - "malice": 28238, - "##ately": 28239, - "nedra": 28240, - "peeling": 28241, - "##rgeon": 28242, - "47th": 28243, - "stadiums": 28244, - "475": 28245, - "vertigo": 28246, - "##ains": 28247, - "iced": 28248, - "restroom": 28249, - "##plify": 28250, - "##tub": 28251, - "illustrating": 28252, - "pear": 28253, - "##chner": 28254, - "##sibility": 28255, - "inorganic": 28256, - "rappers": 28257, - "receipts": 28258, - "watery": 28259, - "##kura": 28260, - "lucinda": 28261, - "##oulos": 28262, - "reintroduced": 28263, - "##8th": 28264, - "##tched": 28265, - "gracefully": 28266, - "saxons": 28267, - "nutritional": 28268, - "wastewater": 28269, - "rained": 28270, - "favourites": 28271, - "bedrock": 28272, - "fisted": 28273, - "hallways": 28274, - "likeness": 28275, - "upscale": 28276, - "##lateral": 28277, - "1580": 28278, - "blinds": 28279, - "prequel": 28280, - "##pps": 28281, - "##tama": 28282, - "deter": 28283, - "humiliating": 28284, - "restraining": 28285, - "tn": 28286, - "vents": 28287, - "1659": 28288, - "laundering": 28289, - "recess": 28290, - "rosary": 28291, - "tractors": 28292, - "coulter": 28293, - "federer": 28294, - "##ifiers": 28295, - "##plin": 28296, - "persistence": 28297, - "##quitable": 28298, - "geschichte": 28299, - "pendulum": 28300, - "quakers": 28301, - "##beam": 28302, - "bassett": 28303, - "pictorial": 28304, - "buffet": 28305, - "koln": 28306, - "##sitor": 28307, - "drills": 28308, - "reciprocal": 28309, - "shooters": 28310, - "##57": 28311, - "##cton": 28312, - "##tees": 28313, - "converge": 28314, - "pip": 28315, - "dmitri": 28316, - "donnelly": 28317, - "yamamoto": 28318, - "aqua": 28319, - "azores": 28320, - "demographics": 28321, - "hypnotic": 28322, - "spitfire": 28323, - "suspend": 28324, - "wryly": 28325, - "roderick": 28326, - "##rran": 28327, - "sebastien": 28328, - "##asurable": 28329, - "mavericks": 28330, - "##fles": 28331, - "##200": 28332, - "himalayan": 28333, - "prodigy": 28334, - "##iance": 28335, - "transvaal": 28336, - "demonstrators": 28337, - "handcuffs": 28338, - "dodged": 28339, - "mcnamara": 28340, - "sublime": 28341, - "1726": 28342, - "crazed": 28343, - "##efined": 28344, - "##till": 28345, - "ivo": 28346, - "pondered": 28347, - "reconciled": 28348, - "shrill": 28349, - "sava": 28350, - "##duk": 28351, - "bal": 28352, - "cad": 28353, - "heresy": 28354, - "jaipur": 28355, - "goran": 28356, - "##nished": 28357, - "341": 28358, - "lux": 28359, - "shelly": 28360, - "whitehall": 28361, - "##hre": 28362, - "israelis": 28363, - "peacekeeping": 28364, - "##wled": 28365, - "1703": 28366, - "demetrius": 28367, - "ousted": 28368, - "##arians": 28369, - "##zos": 28370, - "beale": 28371, - "anwar": 28372, - "backstroke": 28373, - "raged": 28374, - "shrinking": 28375, - "cremated": 28376, - "##yck": 28377, - "benign": 28378, - "towing": 28379, - "wadi": 28380, - "darmstadt": 28381, - "landfill": 28382, - "parana": 28383, - "soothe": 28384, - "colleen": 28385, - "sidewalks": 28386, - "mayfair": 28387, - "tumble": 28388, - "hepatitis": 28389, - "ferrer": 28390, - "superstructure": 28391, - "##gingly": 28392, - "##urse": 28393, - "##wee": 28394, - "anthropological": 28395, - "translators": 28396, - "##mies": 28397, - "closeness": 28398, - "hooves": 28399, - "##pw": 28400, - "mondays": 28401, - "##roll": 28402, - "##vita": 28403, - "landscaping": 28404, - "##urized": 28405, - "purification": 28406, - "sock": 28407, - "thorns": 28408, - "thwarted": 28409, - "jalan": 28410, - "tiberius": 28411, - "##taka": 28412, - "saline": 28413, - "##rito": 28414, - "confidently": 28415, - "khyber": 28416, - "sculptors": 28417, - "##ij": 28418, - "brahms": 28419, - "hammersmith": 28420, - "inspectors": 28421, - "battista": 28422, - "fivb": 28423, - "fragmentation": 28424, - "hackney": 28425, - "##uls": 28426, - "arresting": 28427, - "exercising": 28428, - "antoinette": 28429, - "bedfordshire": 28430, - "##zily": 28431, - "dyed": 28432, - "##hema": 28433, - "1656": 28434, - "racetrack": 28435, - "variability": 28436, - "##tique": 28437, - "1655": 28438, - "austrians": 28439, - "deteriorating": 28440, - "madman": 28441, - "theorists": 28442, - "aix": 28443, - "lehman": 28444, - "weathered": 28445, - "1731": 28446, - "decreed": 28447, - "eruptions": 28448, - "1729": 28449, - "flaw": 28450, - "quinlan": 28451, - "sorbonne": 28452, - "flutes": 28453, - "nunez": 28454, - "1711": 28455, - "adored": 28456, - "downwards": 28457, - "fable": 28458, - "rasped": 28459, - "1712": 28460, - "moritz": 28461, - "mouthful": 28462, - "renegade": 28463, - "shivers": 28464, - "stunts": 28465, - "dysfunction": 28466, - "restrain": 28467, - "translit": 28468, - "327": 28469, - "pancakes": 28470, - "##avio": 28471, - "##cision": 28472, - "##tray": 28473, - "351": 28474, - "vial": 28475, - "##lden": 28476, - "bain": 28477, - "##maid": 28478, - "##oxide": 28479, - "chihuahua": 28480, - "malacca": 28481, - "vimes": 28482, - "##rba": 28483, - "##rnier": 28484, - "1664": 28485, - "donnie": 28486, - "plaques": 28487, - "##ually": 28488, - "337": 28489, - "bangs": 28490, - "floppy": 28491, - "huntsville": 28492, - "loretta": 28493, - "nikolay": 28494, - "##otte": 28495, - "eater": 28496, - "handgun": 28497, - "ubiquitous": 28498, - "##hett": 28499, - "eras": 28500, - "zodiac": 28501, - "1634": 28502, - "##omorphic": 28503, - "1820s": 28504, - "##zog": 28505, - "cochran": 28506, - "##bula": 28507, - "##lithic": 28508, - "warring": 28509, - "##rada": 28510, - "dalai": 28511, - "excused": 28512, - "blazers": 28513, - "mcconnell": 28514, - "reeling": 28515, - "bot": 28516, - "este": 28517, - "##abi": 28518, - "geese": 28519, - "hoax": 28520, - "taxon": 28521, - "##bla": 28522, - "guitarists": 28523, - "##icon": 28524, - "condemning": 28525, - "hunts": 28526, - "inversion": 28527, - "moffat": 28528, - "taekwondo": 28529, - "##lvis": 28530, - "1624": 28531, - "stammered": 28532, - "##rest": 28533, - "##rzy": 28534, - "sousa": 28535, - "fundraiser": 28536, - "marylebone": 28537, - "navigable": 28538, - "uptown": 28539, - "cabbage": 28540, - "daniela": 28541, - "salman": 28542, - "shitty": 28543, - "whimper": 28544, - "##kian": 28545, - "##utive": 28546, - "programmers": 28547, - "protections": 28548, - "rm": 28549, - "##rmi": 28550, - "##rued": 28551, - "forceful": 28552, - "##enes": 28553, - "fuss": 28554, - "##tao": 28555, - "##wash": 28556, - "brat": 28557, - "oppressive": 28558, - "reykjavik": 28559, - "spartak": 28560, - "ticking": 28561, - "##inkles": 28562, - "##kiewicz": 28563, - "adolph": 28564, - "horst": 28565, - "maui": 28566, - "protege": 28567, - "straighten": 28568, - "cpc": 28569, - "landau": 28570, - "concourse": 28571, - "clements": 28572, - "resultant": 28573, - "##ando": 28574, - "imaginative": 28575, - "joo": 28576, - "reactivated": 28577, - "##rem": 28578, - "##ffled": 28579, - "##uising": 28580, - "consultative": 28581, - "##guide": 28582, - "flop": 28583, - "kaitlyn": 28584, - "mergers": 28585, - "parenting": 28586, - "somber": 28587, - "##vron": 28588, - "supervise": 28589, - "vidhan": 28590, - "##imum": 28591, - "courtship": 28592, - "exemplified": 28593, - "harmonies": 28594, - "medallist": 28595, - "refining": 28596, - "##rrow": 28597, - "##ка": 28598, - "amara": 28599, - "##hum": 28600, - "780": 28601, - "goalscorer": 28602, - "sited": 28603, - "overshadowed": 28604, - "rohan": 28605, - "displeasure": 28606, - "secretive": 28607, - "multiplied": 28608, - "osman": 28609, - "##orth": 28610, - "engravings": 28611, - "padre": 28612, - "##kali": 28613, - "##veda": 28614, - "miniatures": 28615, - "mis": 28616, - "##yala": 28617, - "clap": 28618, - "pali": 28619, - "rook": 28620, - "##cana": 28621, - "1692": 28622, - "57th": 28623, - "antennae": 28624, - "astro": 28625, - "oskar": 28626, - "1628": 28627, - "bulldog": 28628, - "crotch": 28629, - "hackett": 28630, - "yucatan": 28631, - "##sure": 28632, - "amplifiers": 28633, - "brno": 28634, - "ferrara": 28635, - "migrating": 28636, - "##gree": 28637, - "thanking": 28638, - "turing": 28639, - "##eza": 28640, - "mccann": 28641, - "ting": 28642, - "andersson": 28643, - "onslaught": 28644, - "gaines": 28645, - "ganga": 28646, - "incense": 28647, - "standardization": 28648, - "##mation": 28649, - "sentai": 28650, - "scuba": 28651, - "stuffing": 28652, - "turquoise": 28653, - "waivers": 28654, - "alloys": 28655, - "##vitt": 28656, - "regaining": 28657, - "vaults": 28658, - "##clops": 28659, - "##gizing": 28660, - "digger": 28661, - "furry": 28662, - "memorabilia": 28663, - "probing": 28664, - "##iad": 28665, - "payton": 28666, - "rec": 28667, - "deutschland": 28668, - "filippo": 28669, - "opaque": 28670, - "seamen": 28671, - "zenith": 28672, - "afrikaans": 28673, - "##filtration": 28674, - "disciplined": 28675, - "inspirational": 28676, - "##merie": 28677, - "banco": 28678, - "confuse": 28679, - "grafton": 28680, - "tod": 28681, - "##dgets": 28682, - "championed": 28683, - "simi": 28684, - "anomaly": 28685, - "biplane": 28686, - "##ceptive": 28687, - "electrode": 28688, - "##para": 28689, - "1697": 28690, - "cleavage": 28691, - "crossbow": 28692, - "swirl": 28693, - "informant": 28694, - "##lars": 28695, - "##osta": 28696, - "afi": 28697, - "bonfire": 28698, - "spec": 28699, - "##oux": 28700, - "lakeside": 28701, - "slump": 28702, - "##culus": 28703, - "##lais": 28704, - "##qvist": 28705, - "##rrigan": 28706, - "1016": 28707, - "facades": 28708, - "borg": 28709, - "inwardly": 28710, - "cervical": 28711, - "xl": 28712, - "pointedly": 28713, - "050": 28714, - "stabilization": 28715, - "##odon": 28716, - "chests": 28717, - "1699": 28718, - "hacked": 28719, - "ctv": 28720, - "orthogonal": 28721, - "suzy": 28722, - "##lastic": 28723, - "gaulle": 28724, - "jacobite": 28725, - "rearview": 28726, - "##cam": 28727, - "##erted": 28728, - "ashby": 28729, - "##drik": 28730, - "##igate": 28731, - "##mise": 28732, - "##zbek": 28733, - "affectionately": 28734, - "canine": 28735, - "disperse": 28736, - "latham": 28737, - "##istles": 28738, - "##ivar": 28739, - "spielberg": 28740, - "##orin": 28741, - "##idium": 28742, - "ezekiel": 28743, - "cid": 28744, - "##sg": 28745, - "durga": 28746, - "middletown": 28747, - "##cina": 28748, - "customized": 28749, - "frontiers": 28750, - "harden": 28751, - "##etano": 28752, - "##zzy": 28753, - "1604": 28754, - "bolsheviks": 28755, - "##66": 28756, - "coloration": 28757, - "yoko": 28758, - "##bedo": 28759, - "briefs": 28760, - "slabs": 28761, - "debra": 28762, - "liquidation": 28763, - "plumage": 28764, - "##oin": 28765, - "blossoms": 28766, - "dementia": 28767, - "subsidy": 28768, - "1611": 28769, - "proctor": 28770, - "relational": 28771, - "jerseys": 28772, - "parochial": 28773, - "ter": 28774, - "##ici": 28775, - "esa": 28776, - "peshawar": 28777, - "cavalier": 28778, - "loren": 28779, - "cpi": 28780, - "idiots": 28781, - "shamrock": 28782, - "1646": 28783, - "dutton": 28784, - "malabar": 28785, - "mustache": 28786, - "##endez": 28787, - "##ocytes": 28788, - "referencing": 28789, - "terminates": 28790, - "marche": 28791, - "yarmouth": 28792, - "##sop": 28793, - "acton": 28794, - "mated": 28795, - "seton": 28796, - "subtly": 28797, - "baptised": 28798, - "beige": 28799, - "extremes": 28800, - "jolted": 28801, - "kristina": 28802, - "telecast": 28803, - "##actic": 28804, - "safeguard": 28805, - "waldo": 28806, - "##baldi": 28807, - "##bular": 28808, - "endeavors": 28809, - "sloppy": 28810, - "subterranean": 28811, - "##ensburg": 28812, - "##itung": 28813, - "delicately": 28814, - "pigment": 28815, - "tq": 28816, - "##scu": 28817, - "1626": 28818, - "##ound": 28819, - "collisions": 28820, - "coveted": 28821, - "herds": 28822, - "##personal": 28823, - "##meister": 28824, - "##nberger": 28825, - "chopra": 28826, - "##ricting": 28827, - "abnormalities": 28828, - "defective": 28829, - "galician": 28830, - "lucie": 28831, - "##dilly": 28832, - "alligator": 28833, - "likened": 28834, - "##genase": 28835, - "burundi": 28836, - "clears": 28837, - "complexion": 28838, - "derelict": 28839, - "deafening": 28840, - "diablo": 28841, - "fingered": 28842, - "champaign": 28843, - "dogg": 28844, - "enlist": 28845, - "isotope": 28846, - "labeling": 28847, - "mrna": 28848, - "##erre": 28849, - "brilliance": 28850, - "marvelous": 28851, - "##ayo": 28852, - "1652": 28853, - "crawley": 28854, - "ether": 28855, - "footed": 28856, - "dwellers": 28857, - "deserts": 28858, - "hamish": 28859, - "rubs": 28860, - "warlock": 28861, - "skimmed": 28862, - "##lizer": 28863, - "870": 28864, - "buick": 28865, - "embark": 28866, - "heraldic": 28867, - "irregularities": 28868, - "##ajan": 28869, - "kiara": 28870, - "##kulam": 28871, - "##ieg": 28872, - "antigen": 28873, - "kowalski": 28874, - "##lge": 28875, - "oakley": 28876, - "visitation": 28877, - "##mbit": 28878, - "vt": 28879, - "##suit": 28880, - "1570": 28881, - "murderers": 28882, - "##miento": 28883, - "##rites": 28884, - "chimneys": 28885, - "##sling": 28886, - "condemn": 28887, - "custer": 28888, - "exchequer": 28889, - "havre": 28890, - "##ghi": 28891, - "fluctuations": 28892, - "##rations": 28893, - "dfb": 28894, - "hendricks": 28895, - "vaccines": 28896, - "##tarian": 28897, - "nietzsche": 28898, - "biking": 28899, - "juicy": 28900, - "##duced": 28901, - "brooding": 28902, - "scrolling": 28903, - "selangor": 28904, - "##ragan": 28905, - "352": 28906, - "annum": 28907, - "boomed": 28908, - "seminole": 28909, - "sugarcane": 28910, - "##dna": 28911, - "departmental": 28912, - "dismissing": 28913, - "innsbruck": 28914, - "arteries": 28915, - "ashok": 28916, - "batavia": 28917, - "daze": 28918, - "kun": 28919, - "overtook": 28920, - "##rga": 28921, - "##tlan": 28922, - "beheaded": 28923, - "gaddafi": 28924, - "holm": 28925, - "electronically": 28926, - "faulty": 28927, - "galilee": 28928, - "fractures": 28929, - "kobayashi": 28930, - "##lized": 28931, - "gunmen": 28932, - "magma": 28933, - "aramaic": 28934, - "mala": 28935, - "eastenders": 28936, - "inference": 28937, - "messengers": 28938, - "bf": 28939, - "##qu": 28940, - "407": 28941, - "bathrooms": 28942, - "##vere": 28943, - "1658": 28944, - "flashbacks": 28945, - "ideally": 28946, - "misunderstood": 28947, - "##jali": 28948, - "##weather": 28949, - "mendez": 28950, - "##grounds": 28951, - "505": 28952, - "uncanny": 28953, - "##iii": 28954, - "1709": 28955, - "friendships": 28956, - "##nbc": 28957, - "sacrament": 28958, - "accommodated": 28959, - "reiterated": 28960, - "logistical": 28961, - "pebbles": 28962, - "thumped": 28963, - "##escence": 28964, - "administering": 28965, - "decrees": 28966, - "drafts": 28967, - "##flight": 28968, - "##cased": 28969, - "##tula": 28970, - "futuristic": 28971, - "picket": 28972, - "intimidation": 28973, - "winthrop": 28974, - "##fahan": 28975, - "interfered": 28976, - "339": 28977, - "afar": 28978, - "francoise": 28979, - "morally": 28980, - "uta": 28981, - "cochin": 28982, - "croft": 28983, - "dwarfs": 28984, - "##bruck": 28985, - "##dents": 28986, - "##nami": 28987, - "biker": 28988, - "##hner": 28989, - "##meral": 28990, - "nano": 28991, - "##isen": 28992, - "##ometric": 28993, - "##pres": 28994, - "##ан": 28995, - "brightened": 28996, - "meek": 28997, - "parcels": 28998, - "securely": 28999, - "gunners": 29000, - "##jhl": 29001, - "##zko": 29002, - "agile": 29003, - "hysteria": 29004, - "##lten": 29005, - "##rcus": 29006, - "bukit": 29007, - "champs": 29008, - "chevy": 29009, - "cuckoo": 29010, - "leith": 29011, - "sadler": 29012, - "theologians": 29013, - "welded": 29014, - "##section": 29015, - "1663": 29016, - "jj": 29017, - "plurality": 29018, - "xander": 29019, - "##rooms": 29020, - "##formed": 29021, - "shredded": 29022, - "temps": 29023, - "intimately": 29024, - "pau": 29025, - "tormented": 29026, - "##lok": 29027, - "##stellar": 29028, - "1618": 29029, - "charred": 29030, - "ems": 29031, - "essen": 29032, - "##mmel": 29033, - "alarms": 29034, - "spraying": 29035, - "ascot": 29036, - "blooms": 29037, - "twinkle": 29038, - "##abia": 29039, - "##apes": 29040, - "internment": 29041, - "obsidian": 29042, - "##chaft": 29043, - "snoop": 29044, - "##dav": 29045, - "##ooping": 29046, - "malibu": 29047, - "##tension": 29048, - "quiver": 29049, - "##itia": 29050, - "hays": 29051, - "mcintosh": 29052, - "travers": 29053, - "walsall": 29054, - "##ffie": 29055, - "1623": 29056, - "beverley": 29057, - "schwarz": 29058, - "plunging": 29059, - "structurally": 29060, - "m3": 29061, - "rosenthal": 29062, - "vikram": 29063, - "##tsk": 29064, - "770": 29065, - "ghz": 29066, - "##onda": 29067, - "##tiv": 29068, - "chalmers": 29069, - "groningen": 29070, - "pew": 29071, - "reckon": 29072, - "unicef": 29073, - "##rvis": 29074, - "55th": 29075, - "##gni": 29076, - "1651": 29077, - "sulawesi": 29078, - "avila": 29079, - "cai": 29080, - "metaphysical": 29081, - "screwing": 29082, - "turbulence": 29083, - "##mberg": 29084, - "augusto": 29085, - "samba": 29086, - "56th": 29087, - "baffled": 29088, - "momentary": 29089, - "toxin": 29090, - "##urian": 29091, - "##wani": 29092, - "aachen": 29093, - "condoms": 29094, - "dali": 29095, - "steppe": 29096, - "##3d": 29097, - "##app": 29098, - "##oed": 29099, - "##year": 29100, - "adolescence": 29101, - "dauphin": 29102, - "electrically": 29103, - "inaccessible": 29104, - "microscopy": 29105, - "nikita": 29106, - "##ega": 29107, - "atv": 29108, - "##cel": 29109, - "##enter": 29110, - "##oles": 29111, - "##oteric": 29112, - "##ы": 29113, - "accountants": 29114, - "punishments": 29115, - "wrongly": 29116, - "bribes": 29117, - "adventurous": 29118, - "clinch": 29119, - "flinders": 29120, - "southland": 29121, - "##hem": 29122, - "##kata": 29123, - "gough": 29124, - "##ciency": 29125, - "lads": 29126, - "soared": 29127, - "##ה": 29128, - "undergoes": 29129, - "deformation": 29130, - "outlawed": 29131, - "rubbish": 29132, - "##arus": 29133, - "##mussen": 29134, - "##nidae": 29135, - "##rzburg": 29136, - "arcs": 29137, - "##ingdon": 29138, - "##tituted": 29139, - "1695": 29140, - "wheelbase": 29141, - "wheeling": 29142, - "bombardier": 29143, - "campground": 29144, - "zebra": 29145, - "##lices": 29146, - "##oj": 29147, - "##bain": 29148, - "lullaby": 29149, - "##ecure": 29150, - "donetsk": 29151, - "wylie": 29152, - "grenada": 29153, - "##arding": 29154, - "##ης": 29155, - "squinting": 29156, - "eireann": 29157, - "opposes": 29158, - "##andra": 29159, - "maximal": 29160, - "runes": 29161, - "##broken": 29162, - "##cuting": 29163, - "##iface": 29164, - "##ror": 29165, - "##rosis": 29166, - "additive": 29167, - "britney": 29168, - "adultery": 29169, - "triggering": 29170, - "##drome": 29171, - "detrimental": 29172, - "aarhus": 29173, - "containment": 29174, - "jc": 29175, - "swapped": 29176, - "vichy": 29177, - "##ioms": 29178, - "madly": 29179, - "##oric": 29180, - "##rag": 29181, - "brant": 29182, - "##ckey": 29183, - "##trix": 29184, - "1560": 29185, - "1612": 29186, - "broughton": 29187, - "rustling": 29188, - "##stems": 29189, - "##uder": 29190, - "asbestos": 29191, - "mentoring": 29192, - "##nivorous": 29193, - "finley": 29194, - "leaps": 29195, - "##isan": 29196, - "apical": 29197, - "pry": 29198, - "slits": 29199, - "substitutes": 29200, - "##dict": 29201, - "intuitive": 29202, - "fantasia": 29203, - "insistent": 29204, - "unreasonable": 29205, - "##igen": 29206, - "##vna": 29207, - "domed": 29208, - "hannover": 29209, - "margot": 29210, - "ponder": 29211, - "##zziness": 29212, - "impromptu": 29213, - "jian": 29214, - "lc": 29215, - "rampage": 29216, - "stemming": 29217, - "##eft": 29218, - "andrey": 29219, - "gerais": 29220, - "whichever": 29221, - "amnesia": 29222, - "appropriated": 29223, - "anzac": 29224, - "clicks": 29225, - "modifying": 29226, - "ultimatum": 29227, - "cambrian": 29228, - "maids": 29229, - "verve": 29230, - "yellowstone": 29231, - "##mbs": 29232, - "conservatoire": 29233, - "##scribe": 29234, - "adherence": 29235, - "dinners": 29236, - "spectra": 29237, - "imperfect": 29238, - "mysteriously": 29239, - "sidekick": 29240, - "tatar": 29241, - "tuba": 29242, - "##aks": 29243, - "##ifolia": 29244, - "distrust": 29245, - "##athan": 29246, - "##zle": 29247, - "c2": 29248, - "ronin": 29249, - "zac": 29250, - "##pse": 29251, - "celaena": 29252, - "instrumentalist": 29253, - "scents": 29254, - "skopje": 29255, - "##mbling": 29256, - "comical": 29257, - "compensated": 29258, - "vidal": 29259, - "condor": 29260, - "intersect": 29261, - "jingle": 29262, - "wavelengths": 29263, - "##urrent": 29264, - "mcqueen": 29265, - "##izzly": 29266, - "carp": 29267, - "weasel": 29268, - "422": 29269, - "kanye": 29270, - "militias": 29271, - "postdoctoral": 29272, - "eugen": 29273, - "gunslinger": 29274, - "##ɛ": 29275, - "faux": 29276, - "hospice": 29277, - "##for": 29278, - "appalled": 29279, - "derivation": 29280, - "dwarves": 29281, - "##elis": 29282, - "dilapidated": 29283, - "##folk": 29284, - "astoria": 29285, - "philology": 29286, - "##lwyn": 29287, - "##otho": 29288, - "##saka": 29289, - "inducing": 29290, - "philanthropy": 29291, - "##bf": 29292, - "##itative": 29293, - "geek": 29294, - "markedly": 29295, - "sql": 29296, - "##yce": 29297, - "bessie": 29298, - "indices": 29299, - "rn": 29300, - "##flict": 29301, - "495": 29302, - "frowns": 29303, - "resolving": 29304, - "weightlifting": 29305, - "tugs": 29306, - "cleric": 29307, - "contentious": 29308, - "1653": 29309, - "mania": 29310, - "rms": 29311, - "##miya": 29312, - "##reate": 29313, - "##ruck": 29314, - "##tucket": 29315, - "bien": 29316, - "eels": 29317, - "marek": 29318, - "##ayton": 29319, - "##cence": 29320, - "discreet": 29321, - "unofficially": 29322, - "##ife": 29323, - "leaks": 29324, - "##bber": 29325, - "1705": 29326, - "332": 29327, - "dung": 29328, - "compressor": 29329, - "hillsborough": 29330, - "pandit": 29331, - "shillings": 29332, - "distal": 29333, - "##skin": 29334, - "381": 29335, - "##tat": 29336, - "##you": 29337, - "nosed": 29338, - "##nir": 29339, - "mangrove": 29340, - "undeveloped": 29341, - "##idia": 29342, - "textures": 29343, - "##inho": 29344, - "##500": 29345, - "##rise": 29346, - "ae": 29347, - "irritating": 29348, - "nay": 29349, - "amazingly": 29350, - "bancroft": 29351, - "apologetic": 29352, - "compassionate": 29353, - "kata": 29354, - "symphonies": 29355, - "##lovic": 29356, - "airspace": 29357, - "##lch": 29358, - "930": 29359, - "gifford": 29360, - "precautions": 29361, - "fulfillment": 29362, - "sevilla": 29363, - "vulgar": 29364, - "martinique": 29365, - "##urities": 29366, - "looting": 29367, - "piccolo": 29368, - "tidy": 29369, - "##dermott": 29370, - "quadrant": 29371, - "armchair": 29372, - "incomes": 29373, - "mathematicians": 29374, - "stampede": 29375, - "nilsson": 29376, - "##inking": 29377, - "##scan": 29378, - "foo": 29379, - "quarterfinal": 29380, - "##ostal": 29381, - "shang": 29382, - "shouldered": 29383, - "squirrels": 29384, - "##owe": 29385, - "344": 29386, - "vinegar": 29387, - "##bner": 29388, - "##rchy": 29389, - "##systems": 29390, - "delaying": 29391, - "##trics": 29392, - "ars": 29393, - "dwyer": 29394, - "rhapsody": 29395, - "sponsoring": 29396, - "##gration": 29397, - "bipolar": 29398, - "cinder": 29399, - "starters": 29400, - "##olio": 29401, - "##urst": 29402, - "421": 29403, - "signage": 29404, - "##nty": 29405, - "aground": 29406, - "figurative": 29407, - "mons": 29408, - "acquaintances": 29409, - "duets": 29410, - "erroneously": 29411, - "soyuz": 29412, - "elliptic": 29413, - "recreated": 29414, - "##cultural": 29415, - "##quette": 29416, - "##ssed": 29417, - "##tma": 29418, - "##zcz": 29419, - "moderator": 29420, - "scares": 29421, - "##itaire": 29422, - "##stones": 29423, - "##udence": 29424, - "juniper": 29425, - "sighting": 29426, - "##just": 29427, - "##nsen": 29428, - "britten": 29429, - "calabria": 29430, - "ry": 29431, - "bop": 29432, - "cramer": 29433, - "forsyth": 29434, - "stillness": 29435, - "##л": 29436, - "airmen": 29437, - "gathers": 29438, - "unfit": 29439, - "##umber": 29440, - "##upt": 29441, - "taunting": 29442, - "##rip": 29443, - "seeker": 29444, - "streamlined": 29445, - "##bution": 29446, - "holster": 29447, - "schumann": 29448, - "tread": 29449, - "vox": 29450, - "##gano": 29451, - "##onzo": 29452, - "strive": 29453, - "dil": 29454, - "reforming": 29455, - "covent": 29456, - "newbury": 29457, - "predicting": 29458, - "##orro": 29459, - "decorate": 29460, - "tre": 29461, - "##puted": 29462, - "andover": 29463, - "ie": 29464, - "asahi": 29465, - "dept": 29466, - "dunkirk": 29467, - "gills": 29468, - "##tori": 29469, - "buren": 29470, - "huskies": 29471, - "##stis": 29472, - "##stov": 29473, - "abstracts": 29474, - "bets": 29475, - "loosen": 29476, - "##opa": 29477, - "1682": 29478, - "yearning": 29479, - "##glio": 29480, - "##sir": 29481, - "berman": 29482, - "effortlessly": 29483, - "enamel": 29484, - "napoli": 29485, - "persist": 29486, - "##peration": 29487, - "##uez": 29488, - "attache": 29489, - "elisa": 29490, - "b1": 29491, - "invitations": 29492, - "##kic": 29493, - "accelerating": 29494, - "reindeer": 29495, - "boardwalk": 29496, - "clutches": 29497, - "nelly": 29498, - "polka": 29499, - "starbucks": 29500, - "##kei": 29501, - "adamant": 29502, - "huey": 29503, - "lough": 29504, - "unbroken": 29505, - "adventurer": 29506, - "embroidery": 29507, - "inspecting": 29508, - "stanza": 29509, - "##ducted": 29510, - "naia": 29511, - "taluka": 29512, - "##pone": 29513, - "##roids": 29514, - "chases": 29515, - "deprivation": 29516, - "florian": 29517, - "##jing": 29518, - "##ppet": 29519, - "earthly": 29520, - "##lib": 29521, - "##ssee": 29522, - "colossal": 29523, - "foreigner": 29524, - "vet": 29525, - "freaks": 29526, - "patrice": 29527, - "rosewood": 29528, - "triassic": 29529, - "upstate": 29530, - "##pkins": 29531, - "dominates": 29532, - "ata": 29533, - "chants": 29534, - "ks": 29535, - "vo": 29536, - "##400": 29537, - "##bley": 29538, - "##raya": 29539, - "##rmed": 29540, - "555": 29541, - "agra": 29542, - "infiltrate": 29543, - "##ailing": 29544, - "##ilation": 29545, - "##tzer": 29546, - "##uppe": 29547, - "##werk": 29548, - "binoculars": 29549, - "enthusiast": 29550, - "fujian": 29551, - "squeak": 29552, - "##avs": 29553, - "abolitionist": 29554, - "almeida": 29555, - "boredom": 29556, - "hampstead": 29557, - "marsden": 29558, - "rations": 29559, - "##ands": 29560, - "inflated": 29561, - "334": 29562, - "bonuses": 29563, - "rosalie": 29564, - "patna": 29565, - "##rco": 29566, - "329": 29567, - "detachments": 29568, - "penitentiary": 29569, - "54th": 29570, - "flourishing": 29571, - "woolf": 29572, - "##dion": 29573, - "##etched": 29574, - "papyrus": 29575, - "##lster": 29576, - "##nsor": 29577, - "##toy": 29578, - "bobbed": 29579, - "dismounted": 29580, - "endelle": 29581, - "inhuman": 29582, - "motorola": 29583, - "tbs": 29584, - "wince": 29585, - "wreath": 29586, - "##ticus": 29587, - "hideout": 29588, - "inspections": 29589, - "sanjay": 29590, - "disgrace": 29591, - "infused": 29592, - "pudding": 29593, - "stalks": 29594, - "##urbed": 29595, - "arsenic": 29596, - "leases": 29597, - "##hyl": 29598, - "##rrard": 29599, - "collarbone": 29600, - "##waite": 29601, - "##wil": 29602, - "dowry": 29603, - "##bant": 29604, - "##edance": 29605, - "genealogical": 29606, - "nitrate": 29607, - "salamanca": 29608, - "scandals": 29609, - "thyroid": 29610, - "necessitated": 29611, - "##!": 29612, - "##\"": 29613, - "###": 29614, - "##$": 29615, - "##%": 29616, - "##&": 29617, - "##'": 29618, - "##(": 29619, - "##)": 29620, - "##*": 29621, - "##+": 29622, - "##,": 29623, - "##-": 29624, - "##.": 29625, - "##/": 29626, - "##:": 29627, - "##;": 29628, - "##<": 29629, - "##=": 29630, - "##>": 29631, - "##?": 29632, - "##@": 29633, - "##[": 29634, - "##\\": 29635, - "##]": 29636, - "##^": 29637, - "##_": 29638, - "##`": 29639, - "##{": 29640, - "##|": 29641, - "##}": 29642, - "##~": 29643, - "##¡": 29644, - "##¢": 29645, - "##£": 29646, - "##¤": 29647, - "##¥": 29648, - "##¦": 29649, - "##§": 29650, - "##¨": 29651, - "##©": 29652, - "##ª": 29653, - "##«": 29654, - "##¬": 29655, - "##®": 29656, - "##±": 29657, - "##´": 29658, - "##µ": 29659, - "##¶": 29660, - "##·": 29661, - "##º": 29662, - "##»": 29663, - "##¼": 29664, - "##¾": 29665, - "##¿": 29666, - "##æ": 29667, - "##ð": 29668, - "##÷": 29669, - "##þ": 29670, - "##đ": 29671, - "##ħ": 29672, - "##ŋ": 29673, - "##œ": 29674, - "##ƒ": 29675, - "##ɐ": 29676, - "##ɑ": 29677, - "##ɒ": 29678, - "##ɔ": 29679, - "##ɕ": 29680, - "##ə": 29681, - "##ɡ": 29682, - "##ɣ": 29683, - "##ɨ": 29684, - "##ɪ": 29685, - "##ɫ": 29686, - "##ɬ": 29687, - "##ɯ": 29688, - "##ɲ": 29689, - "##ɴ": 29690, - "##ɹ": 29691, - "##ɾ": 29692, - "##ʀ": 29693, - "##ʁ": 29694, - "##ʂ": 29695, - "##ʃ": 29696, - "##ʉ": 29697, - "##ʊ": 29698, - "##ʋ": 29699, - "##ʌ": 29700, - "##ʎ": 29701, - "##ʐ": 29702, - "##ʑ": 29703, - "##ʒ": 29704, - "##ʔ": 29705, - "##ʰ": 29706, - "##ʲ": 29707, - "##ʳ": 29708, - "##ʷ": 29709, - "##ʸ": 29710, - "##ʻ": 29711, - "##ʼ": 29712, - "##ʾ": 29713, - "##ʿ": 29714, - "##ˈ": 29715, - "##ˡ": 29716, - "##ˢ": 29717, - "##ˣ": 29718, - "##ˤ": 29719, - "##β": 29720, - "##γ": 29721, - "##δ": 29722, - "##ε": 29723, - "##ζ": 29724, - "##θ": 29725, - "##κ": 29726, - "##λ": 29727, - "##μ": 29728, - "##ξ": 29729, - "##ο": 29730, - "##π": 29731, - "##ρ": 29732, - "##σ": 29733, - "##τ": 29734, - "##υ": 29735, - "##φ": 29736, - "##χ": 29737, - "##ψ": 29738, - "##ω": 29739, - "##б": 29740, - "##г": 29741, - "##д": 29742, - "##ж": 29743, - "##з": 29744, - "##м": 29745, - "##п": 29746, - "##с": 29747, - "##у": 29748, - "##ф": 29749, - "##х": 29750, - "##ц": 29751, - "##ч": 29752, - "##ш": 29753, - "##щ": 29754, - "##ъ": 29755, - "##э": 29756, - "##ю": 29757, - "##ђ": 29758, - "##є": 29759, - "##і": 29760, - "##ј": 29761, - "##љ": 29762, - "##њ": 29763, - "##ћ": 29764, - "##ӏ": 29765, - "##ա": 29766, - "##բ": 29767, - "##գ": 29768, - "##դ": 29769, - "##ե": 29770, - "##թ": 29771, - "##ի": 29772, - "##լ": 29773, - "##կ": 29774, - "##հ": 29775, - "##մ": 29776, - "##յ": 29777, - "##ն": 29778, - "##ո": 29779, - "##պ": 29780, - "##ս": 29781, - "##վ": 29782, - "##տ": 29783, - "##ր": 29784, - "##ւ": 29785, - "##ք": 29786, - "##־": 29787, - "##א": 29788, - "##ב": 29789, - "##ג": 29790, - "##ד": 29791, - "##ו": 29792, - "##ז": 29793, - "##ח": 29794, - "##ט": 29795, - "##י": 29796, - "##ך": 29797, - "##כ": 29798, - "##ל": 29799, - "##ם": 29800, - "##מ": 29801, - "##ן": 29802, - "##נ": 29803, - "##ס": 29804, - "##ע": 29805, - "##ף": 29806, - "##פ": 29807, - "##ץ": 29808, - "##צ": 29809, - "##ק": 29810, - "##ר": 29811, - "##ש": 29812, - "##ת": 29813, - "##،": 29814, - "##ء": 29815, - "##ب": 29816, - "##ت": 29817, - "##ث": 29818, - "##ج": 29819, - "##ح": 29820, - "##خ": 29821, - "##ذ": 29822, - "##ز": 29823, - "##س": 29824, - "##ش": 29825, - "##ص": 29826, - "##ض": 29827, - "##ط": 29828, - "##ظ": 29829, - "##ع": 29830, - "##غ": 29831, - "##ـ": 29832, - "##ف": 29833, - "##ق": 29834, - "##ك": 29835, - "##و": 29836, - "##ى": 29837, - "##ٹ": 29838, - "##پ": 29839, - "##چ": 29840, - "##ک": 29841, - "##گ": 29842, - "##ں": 29843, - "##ھ": 29844, - "##ہ": 29845, - "##ے": 29846, - "##अ": 29847, - "##आ": 29848, - "##उ": 29849, - "##ए": 29850, - "##क": 29851, - "##ख": 29852, - "##ग": 29853, - "##च": 29854, - "##ज": 29855, - "##ट": 29856, - "##ड": 29857, - "##ण": 29858, - "##त": 29859, - "##थ": 29860, - "##द": 29861, - "##ध": 29862, - "##न": 29863, - "##प": 29864, - "##ब": 29865, - "##भ": 29866, - "##म": 29867, - "##य": 29868, - "##र": 29869, - "##ल": 29870, - "##व": 29871, - "##श": 29872, - "##ष": 29873, - "##स": 29874, - "##ह": 29875, - "##ा": 29876, - "##ि": 29877, - "##ी": 29878, - "##ो": 29879, - "##।": 29880, - "##॥": 29881, - "##ং": 29882, - "##অ": 29883, - "##আ": 29884, - "##ই": 29885, - "##উ": 29886, - "##এ": 29887, - "##ও": 29888, - "##ক": 29889, - "##খ": 29890, - "##গ": 29891, - "##চ": 29892, - "##ছ": 29893, - "##জ": 29894, - "##ট": 29895, - "##ড": 29896, - "##ণ": 29897, - "##ত": 29898, - "##থ": 29899, - "##দ": 29900, - "##ধ": 29901, - "##ন": 29902, - "##প": 29903, - "##ব": 29904, - "##ভ": 29905, - "##ম": 29906, - "##য": 29907, - "##র": 29908, - "##ল": 29909, - "##শ": 29910, - "##ষ": 29911, - "##স": 29912, - "##হ": 29913, - "##া": 29914, - "##ি": 29915, - "##ী": 29916, - "##ে": 29917, - "##க": 29918, - "##ச": 29919, - "##ட": 29920, - "##த": 29921, - "##ந": 29922, - "##ன": 29923, - "##ப": 29924, - "##ம": 29925, - "##ய": 29926, - "##ர": 29927, - "##ல": 29928, - "##ள": 29929, - "##வ": 29930, - "##ா": 29931, - "##ி": 29932, - "##ு": 29933, - "##ே": 29934, - "##ை": 29935, - "##ನ": 29936, - "##ರ": 29937, - "##ಾ": 29938, - "##ක": 29939, - "##ය": 29940, - "##ර": 29941, - "##ල": 29942, - "##ව": 29943, - "##ා": 29944, - "##ก": 29945, - "##ง": 29946, - "##ต": 29947, - "##ท": 29948, - "##น": 29949, - "##พ": 29950, - "##ม": 29951, - "##ย": 29952, - "##ร": 29953, - "##ล": 29954, - "##ว": 29955, - "##ส": 29956, - "##อ": 29957, - "##า": 29958, - "##เ": 29959, - "##་": 29960, - "##།": 29961, - "##ག": 29962, - "##ང": 29963, - "##ད": 29964, - "##ན": 29965, - "##པ": 29966, - "##བ": 29967, - "##མ": 29968, - "##འ": 29969, - "##ར": 29970, - "##ལ": 29971, - "##ས": 29972, - "##မ": 29973, - "##ა": 29974, - "##ბ": 29975, - "##გ": 29976, - "##დ": 29977, - "##ე": 29978, - "##ვ": 29979, - "##თ": 29980, - "##ი": 29981, - "##კ": 29982, - "##ლ": 29983, - "##მ": 29984, - "##ნ": 29985, - "##ო": 29986, - "##რ": 29987, - "##ს": 29988, - "##ტ": 29989, - "##უ": 29990, - "##ᄀ": 29991, - "##ᄂ": 29992, - "##ᄃ": 29993, - "##ᄅ": 29994, - "##ᄆ": 29995, - "##ᄇ": 29996, - "##ᄉ": 29997, - "##ᄊ": 29998, - "##ᄋ": 29999, - "##ᄌ": 30000, - "##ᄎ": 30001, - "##ᄏ": 30002, - "##ᄐ": 30003, - "##ᄑ": 30004, - "##ᄒ": 30005, - "##ᅡ": 30006, - "##ᅢ": 30007, - "##ᅥ": 30008, - "##ᅦ": 30009, - "##ᅧ": 30010, - "##ᅩ": 30011, - "##ᅪ": 30012, - "##ᅭ": 30013, - "##ᅮ": 30014, - "##ᅯ": 30015, - "##ᅲ": 30016, - "##ᅳ": 30017, - "##ᅴ": 30018, - "##ᅵ": 30019, - "##ᆨ": 30020, - "##ᆫ": 30021, - "##ᆯ": 30022, - "##ᆷ": 30023, - "##ᆸ": 30024, - "##ᆼ": 30025, - "##ᴬ": 30026, - "##ᴮ": 30027, - "##ᴰ": 30028, - "##ᴵ": 30029, - "##ᴺ": 30030, - "##ᵀ": 30031, - "##ᵃ": 30032, - "##ᵇ": 30033, - "##ᵈ": 30034, - "##ᵉ": 30035, - "##ᵍ": 30036, - "##ᵏ": 30037, - "##ᵐ": 30038, - "##ᵒ": 30039, - "##ᵖ": 30040, - "##ᵗ": 30041, - "##ᵘ": 30042, - "##ᵣ": 30043, - "##ᵤ": 30044, - "##ᵥ": 30045, - "##ᶜ": 30046, - "##ᶠ": 30047, - "##‐": 30048, - "##‑": 30049, - "##‒": 30050, - "##–": 30051, - "##—": 30052, - "##―": 30053, - "##‖": 30054, - "##‘": 30055, - "##’": 30056, - "##‚": 30057, - "##“": 30058, - "##”": 30059, - "##„": 30060, - "##†": 30061, - "##‡": 30062, - "##•": 30063, - "##…": 30064, - "##‰": 30065, - "##′": 30066, - "##″": 30067, - "##›": 30068, - "##‿": 30069, - "##⁄": 30070, - "##⁰": 30071, - "##ⁱ": 30072, - "##⁴": 30073, - "##⁵": 30074, - "##⁶": 30075, - "##⁷": 30076, - "##⁸": 30077, - "##⁹": 30078, - "##⁻": 30079, - "##ⁿ": 30080, - "##₅": 30081, - "##₆": 30082, - "##₇": 30083, - "##₈": 30084, - "##₉": 30085, - "##₊": 30086, - "##₍": 30087, - "##₎": 30088, - "##ₐ": 30089, - "##ₑ": 30090, - "##ₒ": 30091, - "##ₓ": 30092, - "##ₕ": 30093, - "##ₖ": 30094, - "##ₗ": 30095, - "##ₘ": 30096, - "##ₚ": 30097, - "##ₛ": 30098, - "##ₜ": 30099, - "##₤": 30100, - "##₩": 30101, - "##€": 30102, - "##₱": 30103, - "##₹": 30104, - "##ℓ": 30105, - "##№": 30106, - "##ℝ": 30107, - "##™": 30108, - "##⅓": 30109, - "##⅔": 30110, - "##←": 30111, - "##↑": 30112, - "##→": 30113, - "##↓": 30114, - "##↔": 30115, - "##↦": 30116, - "##⇄": 30117, - "##⇌": 30118, - "##⇒": 30119, - "##∂": 30120, - "##∅": 30121, - "##∆": 30122, - "##∇": 30123, - "##∈": 30124, - "##∗": 30125, - "##∘": 30126, - "##√": 30127, - "##∞": 30128, - "##∧": 30129, - "##∨": 30130, - "##∩": 30131, - "##∪": 30132, - "##≈": 30133, - "##≡": 30134, - "##≤": 30135, - "##≥": 30136, - "##⊂": 30137, - "##⊆": 30138, - "##⊕": 30139, - "##⊗": 30140, - "##⋅": 30141, - "##─": 30142, - "##│": 30143, - "##■": 30144, - "##▪": 30145, - "##●": 30146, - "##★": 30147, - "##☆": 30148, - "##☉": 30149, - "##♠": 30150, - "##♣": 30151, - "##♥": 30152, - "##♦": 30153, - "##♯": 30154, - "##⟨": 30155, - "##⟩": 30156, - "##ⱼ": 30157, - "##⺩": 30158, - "##⺼": 30159, - "##⽥": 30160, - "##、": 30161, - "##。": 30162, - "##〈": 30163, - "##〉": 30164, - "##《": 30165, - "##》": 30166, - "##「": 30167, - "##」": 30168, - "##『": 30169, - "##』": 30170, - "##〜": 30171, - "##あ": 30172, - "##い": 30173, - "##う": 30174, - "##え": 30175, - "##お": 30176, - "##か": 30177, - "##き": 30178, - "##く": 30179, - "##け": 30180, - "##こ": 30181, - "##さ": 30182, - "##し": 30183, - "##す": 30184, - "##せ": 30185, - "##そ": 30186, - "##た": 30187, - "##ち": 30188, - "##っ": 30189, - "##つ": 30190, - "##て": 30191, - "##と": 30192, - "##な": 30193, - "##に": 30194, - "##ぬ": 30195, - "##ね": 30196, - "##の": 30197, - "##は": 30198, - "##ひ": 30199, - "##ふ": 30200, - "##へ": 30201, - "##ほ": 30202, - "##ま": 30203, - "##み": 30204, - "##む": 30205, - "##め": 30206, - "##も": 30207, - "##や": 30208, - "##ゆ": 30209, - "##よ": 30210, - "##ら": 30211, - "##り": 30212, - "##る": 30213, - "##れ": 30214, - "##ろ": 30215, - "##を": 30216, - "##ん": 30217, - "##ァ": 30218, - "##ア": 30219, - "##ィ": 30220, - "##イ": 30221, - "##ウ": 30222, - "##ェ": 30223, - "##エ": 30224, - "##オ": 30225, - "##カ": 30226, - "##キ": 30227, - "##ク": 30228, - "##ケ": 30229, - "##コ": 30230, - "##サ": 30231, - "##シ": 30232, - "##ス": 30233, - "##セ": 30234, - "##タ": 30235, - "##チ": 30236, - "##ッ": 30237, - "##ツ": 30238, - "##テ": 30239, - "##ト": 30240, - "##ナ": 30241, - "##ニ": 30242, - "##ノ": 30243, - "##ハ": 30244, - "##ヒ": 30245, - "##フ": 30246, - "##ヘ": 30247, - "##ホ": 30248, - "##マ": 30249, - "##ミ": 30250, - "##ム": 30251, - "##メ": 30252, - "##モ": 30253, - "##ャ": 30254, - "##ュ": 30255, - "##ョ": 30256, - "##ラ": 30257, - "##リ": 30258, - "##ル": 30259, - "##レ": 30260, - "##ロ": 30261, - "##ワ": 30262, - "##ン": 30263, - "##・": 30264, - "##ー": 30265, - "##一": 30266, - "##三": 30267, - "##上": 30268, - "##下": 30269, - "##不": 30270, - "##世": 30271, - "##中": 30272, - "##主": 30273, - "##久": 30274, - "##之": 30275, - "##也": 30276, - "##事": 30277, - "##二": 30278, - "##五": 30279, - "##井": 30280, - "##京": 30281, - "##人": 30282, - "##亻": 30283, - "##仁": 30284, - "##介": 30285, - "##代": 30286, - "##仮": 30287, - "##伊": 30288, - "##会": 30289, - "##佐": 30290, - "##侍": 30291, - "##保": 30292, - "##信": 30293, - "##健": 30294, - "##元": 30295, - "##光": 30296, - "##八": 30297, - "##公": 30298, - "##内": 30299, - "##出": 30300, - "##分": 30301, - "##前": 30302, - "##劉": 30303, - "##力": 30304, - "##加": 30305, - "##勝": 30306, - "##北": 30307, - "##区": 30308, - "##十": 30309, - "##千": 30310, - "##南": 30311, - "##博": 30312, - "##原": 30313, - "##口": 30314, - "##古": 30315, - "##史": 30316, - "##司": 30317, - "##合": 30318, - "##吉": 30319, - "##同": 30320, - "##名": 30321, - "##和": 30322, - "##囗": 30323, - "##四": 30324, - "##国": 30325, - "##國": 30326, - "##土": 30327, - "##地": 30328, - "##坂": 30329, - "##城": 30330, - "##堂": 30331, - "##場": 30332, - "##士": 30333, - "##夏": 30334, - "##外": 30335, - "##大": 30336, - "##天": 30337, - "##太": 30338, - "##夫": 30339, - "##奈": 30340, - "##女": 30341, - "##子": 30342, - "##学": 30343, - "##宀": 30344, - "##宇": 30345, - "##安": 30346, - "##宗": 30347, - "##定": 30348, - "##宣": 30349, - "##宮": 30350, - "##家": 30351, - "##宿": 30352, - "##寺": 30353, - "##將": 30354, - "##小": 30355, - "##尚": 30356, - "##山": 30357, - "##岡": 30358, - "##島": 30359, - "##崎": 30360, - "##川": 30361, - "##州": 30362, - "##巿": 30363, - "##帝": 30364, - "##平": 30365, - "##年": 30366, - "##幸": 30367, - "##广": 30368, - "##弘": 30369, - "##張": 30370, - "##彳": 30371, - "##後": 30372, - "##御": 30373, - "##德": 30374, - "##心": 30375, - "##忄": 30376, - "##志": 30377, - "##忠": 30378, - "##愛": 30379, - "##成": 30380, - "##我": 30381, - "##戦": 30382, - "##戸": 30383, - "##手": 30384, - "##扌": 30385, - "##政": 30386, - "##文": 30387, - "##新": 30388, - "##方": 30389, - "##日": 30390, - "##明": 30391, - "##星": 30392, - "##春": 30393, - "##昭": 30394, - "##智": 30395, - "##曲": 30396, - "##書": 30397, - "##月": 30398, - "##有": 30399, - "##朝": 30400, - "##木": 30401, - "##本": 30402, - "##李": 30403, - "##村": 30404, - "##東": 30405, - "##松": 30406, - "##林": 30407, - "##森": 30408, - "##楊": 30409, - "##樹": 30410, - "##橋": 30411, - "##歌": 30412, - "##止": 30413, - "##正": 30414, - "##武": 30415, - "##比": 30416, - "##氏": 30417, - "##民": 30418, - "##水": 30419, - "##氵": 30420, - "##氷": 30421, - "##永": 30422, - "##江": 30423, - "##沢": 30424, - "##河": 30425, - "##治": 30426, - "##法": 30427, - "##海": 30428, - "##清": 30429, - "##漢": 30430, - "##瀬": 30431, - "##火": 30432, - "##版": 30433, - "##犬": 30434, - "##王": 30435, - "##生": 30436, - "##田": 30437, - "##男": 30438, - "##疒": 30439, - "##発": 30440, - "##白": 30441, - "##的": 30442, - "##皇": 30443, - "##目": 30444, - "##相": 30445, - "##省": 30446, - "##真": 30447, - "##石": 30448, - "##示": 30449, - "##社": 30450, - "##神": 30451, - "##福": 30452, - "##禾": 30453, - "##秀": 30454, - "##秋": 30455, - "##空": 30456, - "##立": 30457, - "##章": 30458, - "##竹": 30459, - "##糹": 30460, - "##美": 30461, - "##義": 30462, - "##耳": 30463, - "##良": 30464, - "##艹": 30465, - "##花": 30466, - "##英": 30467, - "##華": 30468, - "##葉": 30469, - "##藤": 30470, - "##行": 30471, - "##街": 30472, - "##西": 30473, - "##見": 30474, - "##訁": 30475, - "##語": 30476, - "##谷": 30477, - "##貝": 30478, - "##貴": 30479, - "##車": 30480, - "##軍": 30481, - "##辶": 30482, - "##道": 30483, - "##郎": 30484, - "##郡": 30485, - "##部": 30486, - "##都": 30487, - "##里": 30488, - "##野": 30489, - "##金": 30490, - "##鈴": 30491, - "##镇": 30492, - "##長": 30493, - "##門": 30494, - "##間": 30495, - "##阝": 30496, - "##阿": 30497, - "##陳": 30498, - "##陽": 30499, - "##雄": 30500, - "##青": 30501, - "##面": 30502, - "##風": 30503, - "##食": 30504, - "##香": 30505, - "##馬": 30506, - "##高": 30507, - "##龍": 30508, - "##龸": 30509, - "##fi": 30510, - "##fl": 30511, - "##!": 30512, - "##(": 30513, - "##)": 30514, - "##,": 30515, - "##-": 30516, - "##.": 30517, - "##/": 30518, - "##:": 30519, - "##?": 30520, - "##~": 30521 - } - } -} \ No newline at end of file diff --git a/crawl4ai/models/onnx/tokenizer_config.json b/crawl4ai/models/onnx/tokenizer_config.json deleted file mode 100644 index 0d9932d..0000000 --- a/crawl4ai/models/onnx/tokenizer_config.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "cls_token": "[CLS]", - "do_basic_tokenize": true, - "do_lower_case": true, - "mask_token": "[MASK]", - "model_max_length": 512, - "never_split": null, - "pad_token": "[PAD]", - "sep_token": "[SEP]", - "special_tokens_map_file": "/Users/hammad/.cache/huggingface/hub/models--sentence-transformers--all-MiniLM-L6-v2/snapshots/7dbbc90392e2f80f3d3c277d6e90027e55de9125/special_tokens_map.json", - "strip_accents": null, - "tokenize_chinese_chars": true, - "tokenizer_class": "BertTokenizer", - "unk_token": "[UNK]" -} diff --git a/crawl4ai/models/onnx/vocab.txt b/crawl4ai/models/onnx/vocab.txt deleted file mode 100644 index fb14027..0000000 --- a/crawl4ai/models/onnx/vocab.txt +++ /dev/null @@ -1,30522 +0,0 @@ -[PAD] -[unused0] -[unused1] -[unused2] -[unused3] -[unused4] -[unused5] -[unused6] -[unused7] -[unused8] -[unused9] -[unused10] -[unused11] -[unused12] -[unused13] -[unused14] -[unused15] -[unused16] -[unused17] -[unused18] -[unused19] -[unused20] -[unused21] -[unused22] -[unused23] -[unused24] -[unused25] -[unused26] -[unused27] -[unused28] -[unused29] -[unused30] -[unused31] -[unused32] -[unused33] -[unused34] -[unused35] -[unused36] -[unused37] -[unused38] -[unused39] -[unused40] -[unused41] -[unused42] -[unused43] -[unused44] -[unused45] -[unused46] -[unused47] -[unused48] -[unused49] -[unused50] -[unused51] -[unused52] -[unused53] -[unused54] -[unused55] -[unused56] -[unused57] -[unused58] -[unused59] -[unused60] -[unused61] -[unused62] -[unused63] -[unused64] -[unused65] -[unused66] -[unused67] -[unused68] -[unused69] -[unused70] -[unused71] -[unused72] -[unused73] -[unused74] -[unused75] -[unused76] -[unused77] -[unused78] -[unused79] -[unused80] -[unused81] -[unused82] -[unused83] -[unused84] -[unused85] -[unused86] -[unused87] -[unused88] -[unused89] -[unused90] -[unused91] -[unused92] -[unused93] -[unused94] -[unused95] -[unused96] -[unused97] -[unused98] -[UNK] -[CLS] -[SEP] -[MASK] -[unused99] -[unused100] -[unused101] -[unused102] -[unused103] -[unused104] -[unused105] -[unused106] -[unused107] -[unused108] -[unused109] -[unused110] -[unused111] -[unused112] -[unused113] -[unused114] -[unused115] -[unused116] -[unused117] -[unused118] -[unused119] -[unused120] -[unused121] -[unused122] -[unused123] -[unused124] -[unused125] -[unused126] -[unused127] -[unused128] -[unused129] -[unused130] -[unused131] -[unused132] -[unused133] -[unused134] -[unused135] -[unused136] -[unused137] -[unused138] -[unused139] -[unused140] -[unused141] -[unused142] -[unused143] -[unused144] -[unused145] -[unused146] -[unused147] -[unused148] -[unused149] -[unused150] -[unused151] -[unused152] -[unused153] -[unused154] -[unused155] -[unused156] -[unused157] -[unused158] -[unused159] -[unused160] -[unused161] -[unused162] -[unused163] -[unused164] -[unused165] -[unused166] -[unused167] -[unused168] -[unused169] -[unused170] -[unused171] -[unused172] -[unused173] -[unused174] -[unused175] -[unused176] -[unused177] -[unused178] -[unused179] -[unused180] -[unused181] -[unused182] -[unused183] -[unused184] -[unused185] -[unused186] -[unused187] -[unused188] -[unused189] -[unused190] -[unused191] -[unused192] -[unused193] -[unused194] -[unused195] -[unused196] -[unused197] -[unused198] -[unused199] -[unused200] -[unused201] -[unused202] -[unused203] -[unused204] -[unused205] -[unused206] -[unused207] -[unused208] -[unused209] -[unused210] -[unused211] -[unused212] -[unused213] -[unused214] -[unused215] -[unused216] -[unused217] -[unused218] -[unused219] -[unused220] -[unused221] -[unused222] -[unused223] -[unused224] -[unused225] -[unused226] -[unused227] -[unused228] -[unused229] -[unused230] -[unused231] -[unused232] -[unused233] -[unused234] -[unused235] -[unused236] -[unused237] -[unused238] -[unused239] -[unused240] -[unused241] -[unused242] -[unused243] -[unused244] -[unused245] -[unused246] -[unused247] -[unused248] -[unused249] -[unused250] -[unused251] -[unused252] -[unused253] -[unused254] -[unused255] -[unused256] -[unused257] -[unused258] -[unused259] -[unused260] -[unused261] -[unused262] -[unused263] -[unused264] -[unused265] -[unused266] -[unused267] -[unused268] -[unused269] -[unused270] -[unused271] -[unused272] -[unused273] -[unused274] -[unused275] -[unused276] -[unused277] -[unused278] -[unused279] -[unused280] -[unused281] -[unused282] -[unused283] -[unused284] -[unused285] -[unused286] -[unused287] -[unused288] -[unused289] -[unused290] -[unused291] -[unused292] -[unused293] -[unused294] -[unused295] -[unused296] -[unused297] -[unused298] -[unused299] -[unused300] -[unused301] -[unused302] -[unused303] -[unused304] -[unused305] -[unused306] -[unused307] -[unused308] -[unused309] -[unused310] -[unused311] -[unused312] -[unused313] -[unused314] -[unused315] -[unused316] -[unused317] -[unused318] -[unused319] -[unused320] -[unused321] -[unused322] -[unused323] -[unused324] -[unused325] -[unused326] -[unused327] -[unused328] -[unused329] -[unused330] -[unused331] -[unused332] -[unused333] -[unused334] -[unused335] -[unused336] -[unused337] -[unused338] -[unused339] -[unused340] -[unused341] -[unused342] -[unused343] -[unused344] -[unused345] -[unused346] -[unused347] -[unused348] -[unused349] -[unused350] -[unused351] -[unused352] -[unused353] -[unused354] -[unused355] -[unused356] -[unused357] -[unused358] -[unused359] -[unused360] -[unused361] -[unused362] -[unused363] -[unused364] -[unused365] -[unused366] -[unused367] -[unused368] -[unused369] -[unused370] -[unused371] -[unused372] -[unused373] -[unused374] -[unused375] -[unused376] -[unused377] -[unused378] -[unused379] -[unused380] -[unused381] -[unused382] -[unused383] -[unused384] -[unused385] -[unused386] -[unused387] -[unused388] -[unused389] -[unused390] -[unused391] -[unused392] -[unused393] -[unused394] -[unused395] -[unused396] -[unused397] -[unused398] -[unused399] -[unused400] -[unused401] -[unused402] -[unused403] -[unused404] -[unused405] -[unused406] -[unused407] -[unused408] -[unused409] -[unused410] -[unused411] -[unused412] -[unused413] -[unused414] -[unused415] -[unused416] -[unused417] -[unused418] -[unused419] -[unused420] -[unused421] -[unused422] -[unused423] -[unused424] -[unused425] -[unused426] -[unused427] -[unused428] -[unused429] -[unused430] -[unused431] -[unused432] -[unused433] -[unused434] -[unused435] -[unused436] -[unused437] -[unused438] -[unused439] -[unused440] -[unused441] -[unused442] -[unused443] -[unused444] -[unused445] -[unused446] -[unused447] -[unused448] -[unused449] -[unused450] -[unused451] -[unused452] -[unused453] -[unused454] -[unused455] -[unused456] -[unused457] -[unused458] -[unused459] -[unused460] -[unused461] -[unused462] -[unused463] -[unused464] -[unused465] -[unused466] -[unused467] -[unused468] -[unused469] -[unused470] -[unused471] -[unused472] -[unused473] -[unused474] -[unused475] -[unused476] -[unused477] -[unused478] -[unused479] -[unused480] -[unused481] -[unused482] -[unused483] -[unused484] -[unused485] -[unused486] -[unused487] -[unused488] -[unused489] -[unused490] -[unused491] -[unused492] -[unused493] -[unused494] -[unused495] -[unused496] -[unused497] -[unused498] -[unused499] -[unused500] -[unused501] -[unused502] -[unused503] -[unused504] -[unused505] -[unused506] -[unused507] -[unused508] -[unused509] -[unused510] -[unused511] -[unused512] -[unused513] -[unused514] -[unused515] -[unused516] -[unused517] -[unused518] -[unused519] -[unused520] -[unused521] -[unused522] -[unused523] -[unused524] -[unused525] -[unused526] -[unused527] -[unused528] -[unused529] -[unused530] -[unused531] -[unused532] -[unused533] -[unused534] -[unused535] -[unused536] -[unused537] -[unused538] -[unused539] -[unused540] -[unused541] -[unused542] -[unused543] -[unused544] -[unused545] -[unused546] -[unused547] -[unused548] -[unused549] -[unused550] -[unused551] -[unused552] -[unused553] -[unused554] -[unused555] -[unused556] -[unused557] -[unused558] -[unused559] -[unused560] -[unused561] -[unused562] -[unused563] -[unused564] -[unused565] -[unused566] -[unused567] -[unused568] -[unused569] -[unused570] -[unused571] -[unused572] -[unused573] -[unused574] -[unused575] -[unused576] -[unused577] -[unused578] -[unused579] -[unused580] -[unused581] -[unused582] -[unused583] -[unused584] -[unused585] -[unused586] -[unused587] -[unused588] -[unused589] -[unused590] -[unused591] -[unused592] -[unused593] -[unused594] -[unused595] -[unused596] -[unused597] -[unused598] -[unused599] -[unused600] -[unused601] -[unused602] -[unused603] -[unused604] -[unused605] -[unused606] -[unused607] -[unused608] -[unused609] -[unused610] -[unused611] -[unused612] -[unused613] -[unused614] -[unused615] -[unused616] -[unused617] -[unused618] -[unused619] -[unused620] -[unused621] -[unused622] -[unused623] -[unused624] -[unused625] -[unused626] -[unused627] -[unused628] -[unused629] -[unused630] -[unused631] -[unused632] -[unused633] -[unused634] -[unused635] -[unused636] -[unused637] -[unused638] -[unused639] -[unused640] -[unused641] -[unused642] -[unused643] -[unused644] -[unused645] -[unused646] -[unused647] -[unused648] -[unused649] -[unused650] -[unused651] -[unused652] -[unused653] -[unused654] -[unused655] -[unused656] -[unused657] -[unused658] -[unused659] -[unused660] -[unused661] -[unused662] -[unused663] -[unused664] -[unused665] -[unused666] -[unused667] -[unused668] -[unused669] -[unused670] -[unused671] -[unused672] -[unused673] -[unused674] -[unused675] -[unused676] -[unused677] -[unused678] -[unused679] -[unused680] -[unused681] -[unused682] -[unused683] -[unused684] -[unused685] -[unused686] -[unused687] -[unused688] -[unused689] -[unused690] -[unused691] -[unused692] -[unused693] -[unused694] -[unused695] -[unused696] -[unused697] -[unused698] -[unused699] -[unused700] -[unused701] -[unused702] -[unused703] -[unused704] -[unused705] -[unused706] -[unused707] -[unused708] -[unused709] -[unused710] -[unused711] -[unused712] -[unused713] -[unused714] -[unused715] -[unused716] -[unused717] -[unused718] -[unused719] -[unused720] -[unused721] -[unused722] -[unused723] -[unused724] -[unused725] -[unused726] -[unused727] -[unused728] -[unused729] -[unused730] -[unused731] -[unused732] -[unused733] -[unused734] -[unused735] -[unused736] -[unused737] -[unused738] -[unused739] -[unused740] -[unused741] -[unused742] -[unused743] -[unused744] -[unused745] -[unused746] -[unused747] -[unused748] -[unused749] -[unused750] -[unused751] -[unused752] -[unused753] -[unused754] -[unused755] -[unused756] -[unused757] -[unused758] -[unused759] -[unused760] -[unused761] -[unused762] -[unused763] -[unused764] -[unused765] -[unused766] -[unused767] -[unused768] -[unused769] -[unused770] -[unused771] -[unused772] -[unused773] -[unused774] -[unused775] -[unused776] -[unused777] -[unused778] -[unused779] -[unused780] -[unused781] -[unused782] -[unused783] -[unused784] -[unused785] -[unused786] -[unused787] -[unused788] -[unused789] -[unused790] -[unused791] -[unused792] -[unused793] -[unused794] -[unused795] -[unused796] -[unused797] -[unused798] -[unused799] -[unused800] -[unused801] -[unused802] -[unused803] -[unused804] -[unused805] -[unused806] -[unused807] -[unused808] -[unused809] -[unused810] -[unused811] -[unused812] -[unused813] -[unused814] -[unused815] -[unused816] -[unused817] -[unused818] -[unused819] -[unused820] -[unused821] -[unused822] -[unused823] -[unused824] -[unused825] -[unused826] -[unused827] -[unused828] -[unused829] -[unused830] -[unused831] -[unused832] -[unused833] -[unused834] -[unused835] -[unused836] -[unused837] -[unused838] -[unused839] -[unused840] -[unused841] -[unused842] -[unused843] -[unused844] -[unused845] -[unused846] -[unused847] -[unused848] -[unused849] -[unused850] -[unused851] -[unused852] -[unused853] -[unused854] -[unused855] -[unused856] -[unused857] -[unused858] -[unused859] -[unused860] -[unused861] -[unused862] -[unused863] -[unused864] -[unused865] -[unused866] -[unused867] -[unused868] -[unused869] -[unused870] -[unused871] -[unused872] -[unused873] -[unused874] -[unused875] -[unused876] -[unused877] -[unused878] -[unused879] -[unused880] -[unused881] -[unused882] -[unused883] -[unused884] -[unused885] -[unused886] -[unused887] -[unused888] -[unused889] -[unused890] -[unused891] -[unused892] -[unused893] -[unused894] -[unused895] -[unused896] -[unused897] -[unused898] -[unused899] -[unused900] -[unused901] -[unused902] -[unused903] -[unused904] -[unused905] -[unused906] -[unused907] -[unused908] -[unused909] -[unused910] -[unused911] -[unused912] -[unused913] -[unused914] -[unused915] -[unused916] -[unused917] -[unused918] -[unused919] -[unused920] -[unused921] -[unused922] -[unused923] -[unused924] -[unused925] -[unused926] -[unused927] -[unused928] -[unused929] -[unused930] -[unused931] -[unused932] -[unused933] -[unused934] -[unused935] -[unused936] -[unused937] -[unused938] -[unused939] -[unused940] -[unused941] -[unused942] -[unused943] -[unused944] -[unused945] -[unused946] -[unused947] -[unused948] -[unused949] -[unused950] -[unused951] -[unused952] -[unused953] -[unused954] -[unused955] -[unused956] -[unused957] -[unused958] -[unused959] -[unused960] -[unused961] -[unused962] -[unused963] -[unused964] -[unused965] -[unused966] -[unused967] -[unused968] -[unused969] -[unused970] -[unused971] -[unused972] -[unused973] -[unused974] -[unused975] -[unused976] -[unused977] -[unused978] -[unused979] -[unused980] -[unused981] -[unused982] -[unused983] -[unused984] -[unused985] -[unused986] -[unused987] -[unused988] -[unused989] -[unused990] -[unused991] -[unused992] -[unused993] -! -" -# -$ -% -& -' -( -) -* -+ -, -- -. -/ -0 -1 -2 -3 -4 -5 -6 -7 -8 -9 -: -; -< -= -> -? -@ -[ -\ -] -^ -_ -` -a -b -c -d -e -f -g -h -i -j -k -l -m -n -o -p -q -r -s -t -u -v -w -x -y -z -{ -| -} -~ -¡ -¢ -£ -¤ -¥ -¦ -§ -¨ -© -ª -« -¬ -® -° -± -² -³ -´ -µ -¶ -· -¹ -º -» -¼ -½ -¾ -¿ -× -ß -æ -ð -÷ -ø -þ -đ -ħ -ı -ł -ŋ -œ -ƒ -ɐ -ɑ -ɒ -ɔ -ɕ -ə -ɛ -ɡ -ɣ -ɨ -ɪ -ɫ -ɬ -ɯ -ɲ -ɴ -ɹ -ɾ -ʀ -ʁ -ʂ -ʃ -ʉ -ʊ -ʋ -ʌ -ʎ -ʐ -ʑ -ʒ -ʔ -ʰ -ʲ -ʳ -ʷ -ʸ -ʻ -ʼ -ʾ -ʿ -ˈ -ː -ˡ -ˢ -ˣ -ˤ -α -β -γ -δ -ε -ζ -η -θ -ι -κ -λ -μ -ν -ξ -ο -π -ρ -ς -σ -τ -υ -φ -χ -ψ -ω -а -б -в -г -д -е -ж -з -и -к -л -м -н -о -п -р -с -т -у -ф -х -ц -ч -ш -щ -ъ -ы -ь -э -ю -я -ђ -є -і -ј -љ -њ -ћ -ӏ -ա -բ -գ -դ -ե -թ -ի -լ -կ -հ -մ -յ -ն -ո -պ -ս -վ -տ -ր -ւ -ք -־ -א -ב -ג -ד -ה -ו -ז -ח -ט -י -ך -כ -ל -ם -מ -ן -נ -ס -ע -ף -פ -ץ -צ -ק -ר -ש -ת -، -ء -ا -ب -ة -ت -ث -ج -ح -خ -د -ذ -ر -ز -س -ش -ص -ض -ط -ظ -ع -غ -ـ -ف -ق -ك -ل -م -ن -ه -و -ى -ي -ٹ -پ -چ -ک -گ -ں -ھ -ہ -ی -ے -अ -आ -उ -ए -क -ख -ग -च -ज -ट -ड -ण -त -थ -द -ध -न -प -ब -भ -म -य -र -ल -व -श -ष -स -ह -ा -ि -ी -ो -। -॥ -ং -অ -আ -ই -উ -এ -ও -ক -খ -গ -চ -ছ -জ -ট -ড -ণ -ত -থ -দ -ধ -ন -প -ব -ভ -ম -য -র -ল -শ -ষ -স -হ -া -ি -ী -ে -க -ச -ட -த -ந -ன -ப -ம -ய -ர -ல -ள -வ -ா -ி -ு -ே -ை -ನ -ರ -ಾ -ක -ය -ර -ල -ව -ා -ก -ง -ต -ท -น -พ -ม -ย -ร -ล -ว -ส -อ -า -เ -་ -། -ག -ང -ད -ན -པ -བ -མ -འ -ར -ལ -ས -မ -ა -ბ -გ -დ -ე -ვ -თ -ი -კ -ლ -მ -ნ -ო -რ -ს -ტ -უ -ᄀ -ᄂ -ᄃ -ᄅ -ᄆ -ᄇ -ᄉ -ᄊ -ᄋ -ᄌ -ᄎ -ᄏ -ᄐ -ᄑ -ᄒ -ᅡ -ᅢ -ᅥ -ᅦ -ᅧ -ᅩ -ᅪ -ᅭ -ᅮ -ᅯ -ᅲ -ᅳ -ᅴ -ᅵ -ᆨ -ᆫ -ᆯ -ᆷ -ᆸ -ᆼ -ᴬ -ᴮ -ᴰ -ᴵ -ᴺ -ᵀ -ᵃ -ᵇ -ᵈ -ᵉ -ᵍ -ᵏ -ᵐ -ᵒ -ᵖ -ᵗ -ᵘ -ᵢ -ᵣ -ᵤ -ᵥ -ᶜ -ᶠ -‐ -‑ -‒ -– -— -― -‖ -‘ -’ -‚ -“ -” -„ -† -‡ -• -… -‰ -′ -″ -› -‿ -⁄ -⁰ -ⁱ -⁴ -⁵ -⁶ -⁷ -⁸ -⁹ -⁺ -⁻ -ⁿ -₀ -₁ -₂ -₃ -₄ -₅ -₆ -₇ -₈ -₉ -₊ -₍ -₎ -ₐ -ₑ -ₒ -ₓ -ₕ -ₖ -ₗ -ₘ -ₙ -ₚ -ₛ -ₜ -₤ -₩ -€ -₱ -₹ -ℓ -№ -ℝ -™ -⅓ -⅔ -← -↑ -→ -↓ -↔ -↦ -⇄ -⇌ -⇒ -∂ -∅ -∆ -∇ -∈ -− -∗ -∘ -√ -∞ -∧ -∨ -∩ -∪ -≈ -≡ -≤ -≥ -⊂ -⊆ -⊕ -⊗ -⋅ -─ -│ -■ -▪ -● -★ -☆ -☉ -♠ -♣ -♥ -♦ -♭ -♯ -⟨ -⟩ -ⱼ -⺩ -⺼ -⽥ -、 -。 -〈 -〉 -《 -》 -「 -」 -『 -』 -〜 -あ -い -う -え -お -か -き -く -け -こ -さ -し -す -せ -そ -た -ち -っ -つ -て -と -な -に -ぬ -ね -の -は -ひ -ふ -へ -ほ -ま -み -む -め -も -や -ゆ -よ -ら -り -る -れ -ろ -を -ん -ァ -ア -ィ -イ -ウ -ェ -エ -オ -カ -キ -ク -ケ -コ -サ -シ -ス -セ -タ -チ -ッ -ツ -テ -ト -ナ -ニ -ノ -ハ -ヒ -フ -ヘ -ホ -マ -ミ -ム -メ -モ -ャ -ュ -ョ -ラ -リ -ル -レ -ロ -ワ -ン -・ -ー -一 -三 -上 -下 -不 -世 -中 -主 -久 -之 -也 -事 -二 -五 -井 -京 -人 -亻 -仁 -介 -代 -仮 -伊 -会 -佐 -侍 -保 -信 -健 -元 -光 -八 -公 -内 -出 -分 -前 -劉 -力 -加 -勝 -北 -区 -十 -千 -南 -博 -原 -口 -古 -史 -司 -合 -吉 -同 -名 -和 -囗 -四 -国 -國 -土 -地 -坂 -城 -堂 -場 -士 -夏 -外 -大 -天 -太 -夫 -奈 -女 -子 -学 -宀 -宇 -安 -宗 -定 -宣 -宮 -家 -宿 -寺 -將 -小 -尚 -山 -岡 -島 -崎 -川 -州 -巿 -帝 -平 -年 -幸 -广 -弘 -張 -彳 -後 -御 -德 -心 -忄 -志 -忠 -愛 -成 -我 -戦 -戸 -手 -扌 -政 -文 -新 -方 -日 -明 -星 -春 -昭 -智 -曲 -書 -月 -有 -朝 -木 -本 -李 -村 -東 -松 -林 -森 -楊 -樹 -橋 -歌 -止 -正 -武 -比 -氏 -民 -水 -氵 -氷 -永 -江 -沢 -河 -治 -法 -海 -清 -漢 -瀬 -火 -版 -犬 -王 -生 -田 -男 -疒 -発 -白 -的 -皇 -目 -相 -省 -真 -石 -示 -社 -神 -福 -禾 -秀 -秋 -空 -立 -章 -竹 -糹 -美 -義 -耳 -良 -艹 -花 -英 -華 -葉 -藤 -行 -街 -西 -見 -訁 -語 -谷 -貝 -貴 -車 -軍 -辶 -道 -郎 -郡 -部 -都 -里 -野 -金 -鈴 -镇 -長 -門 -間 -阝 -阿 -陳 -陽 -雄 -青 -面 -風 -食 -香 -馬 -高 -龍 -龸 -fi -fl -! -( -) -, -- -. -/ -: -? -~ -the -of -and -in -to -was -he -is -as -for -on -with -that -it -his -by -at -from -her -##s -she -you -had -an -were -but -be -this -are -not -my -they -one -which -or -have -him -me -first -all -also -their -has -up -who -out -been -when -after -there -into -new -two -its -##a -time -would -no -what -about -said -we -over -then -other -so -more -##e -can -if -like -back -them -only -some -could -##i -where -just -##ing -during -before -##n -do -##o -made -school -through -than -now -years -most -world -may -between -down -well -three -##d -year -while -will -##ed -##r -##y -later -##t -city -under -around -did -such -being -used -state -people -part -know -against -your -many -second -university -both -national -##er -these -don -known -off -way -until -re -how -even -get -head -... -didn -##ly -team -american -because -de -##l -born -united -film -since -still -long -work -south -us -became -any -high -again -day -family -see -right -man -eyes -house -season -war -states -including -took -life -north -same -each -called -name -much -place -however -go -four -group -another -found -won -area -here -going -10 -away -series -left -home -music -best -make -hand -number -company -several -never -last -john -000 -very -album -take -end -good -too -following -released -game -played -little -began -district -##m -old -want -those -side -held -own -early -county -ll -league -use -west -##u -face -think -##es -2010 -government -##h -march -came -small -general -town -june -##on -line -based -something -##k -september -thought -looked -along -international -2011 -air -july -club -went -january -october -our -august -april -york -12 -few -2012 -2008 -east -show -member -college -2009 -father -public -##us -come -men -five -set -station -church -##c -next -former -november -room -party -located -december -2013 -age -got -2007 -##g -system -let -love -2006 -though -every -2014 -look -song -water -century -without -body -black -night -within -great -women -single -ve -building -large -population -river -named -band -white -started -##an -once -15 -20 -should -18 -2015 -service -top -built -british -open -death -king -moved -local -times -children -february -book -why -11 -door -need -president -order -final -road -wasn -although -due -major -died -village -third -knew -2016 -asked -turned -st -wanted -say -##p -together -received -main -son -served -different -##en -behind -himself -felt -members -power -football -law -voice -play -##in -near -park -history -30 -having -2005 -16 -##man -saw -mother -##al -army -point -front -help -english -street -art -late -hands -games -award -##ia -young -14 -put -published -country -division -across -told -13 -often -ever -french -london -center -six -red -2017 -led -days -include -light -25 -find -tell -among -species -really -according -central -half -2004 -form -original -gave -office -making -enough -lost -full -opened -must -included -live -given -german -player -run -business -woman -community -cup -might -million -land -2000 -court -development -17 -short -round -ii -km -seen -class -story -always -become -sure -research -almost -director -council -la -##2 -career -things -using -island -##z -couldn -car -##is -24 -close -force -##1 -better -free -support -control -field -students -2003 -education -married -##b -nothing -worked -others -record -big -inside -level -anything -continued -give -james -##3 -military -established -non -returned -feel -does -title -written -thing -feet -william -far -co -association -hard -already -2002 -##ra -championship -human -western -100 -##na -department -hall -role -various -production -21 -19 -heart -2001 -living -fire -version -##ers -##f -television -royal -##4 -produced -working -act -case -society -region -present -radio -period -looking -least -total -keep -england -wife -program -per -brother -mind -special -22 -##le -am -works -soon -##6 -political -george -services -taken -created -##7 -further -able -reached -david -union -joined -upon -done -important -social -information -either -##ic -##x -appeared -position -ground -lead -rock -dark -election -23 -board -france -hair -course -arms -site -police -girl -instead -real -sound -##v -words -moment -##te -someone -##8 -summer -project -announced -san -less -wrote -past -followed -##5 -blue -founded -al -finally -india -taking -records -america -##ne -1999 -design -considered -northern -god -stop -battle -toward -european -outside -described -track -today -playing -language -28 -call -26 -heard -professional -low -australia -miles -california -win -yet -green -##ie -trying -blood -##ton -southern -science -maybe -everything -match -square -27 -mouth -video -race -recorded -leave -above -##9 -daughter -points -space -1998 -museum -change -middle -common -##0 -move -tv -post -##ta -lake -seven -tried -elected -closed -ten -paul -minister -##th -months -start -chief -return -canada -person -sea -release -similar -modern -brought -rest -hit -formed -mr -##la -1997 -floor -event -doing -thomas -1996 -robert -care -killed -training -star -week -needed -turn -finished -railway -rather -news -health -sent -example -ran -term -michael -coming -currently -yes -forces -despite -gold -areas -50 -stage -fact -29 -dead -says -popular -2018 -originally -germany -probably -developed -result -pulled -friend -stood -money -running -mi -signed -word -songs -child -eventually -met -tour -average -teams -minutes -festival -current -deep -kind -1995 -decided -usually -eastern -seemed -##ness -episode -bed -added -table -indian -private -charles -route -available -idea -throughout -centre -addition -appointed -style -1994 -books -eight -construction -press -mean -wall -friends -remained -schools -study -##ch -##um -institute -oh -chinese -sometimes -events -possible -1992 -australian -type -brown -forward -talk -process -food -debut -seat -performance -committee -features -character -arts -herself -else -lot -strong -russian -range -hours -peter -arm -##da -morning -dr -sold -##ry -quickly -directed -1993 -guitar -china -##w -31 -list -##ma -performed -media -uk -players -smile -##rs -myself -40 -placed -coach -province -towards -wouldn -leading -whole -boy -official -designed -grand -census -##el -europe -attack -japanese -henry -1991 -##re -##os -cross -getting -alone -action -lower -network -wide -washington -japan -1990 -hospital -believe -changed -sister -##ar -hold -gone -sir -hadn -ship -##ka -studies -academy -shot -rights -below -base -bad -involved -kept -largest -##ist -bank -future -especially -beginning -mark -movement -section -female -magazine -plan -professor -lord -longer -##ian -sat -walked -hill -actually -civil -energy -model -families -size -thus -aircraft -completed -includes -data -captain -##or -fight -vocals -featured -richard -bridge -fourth -1989 -officer -stone -hear -##ism -means -medical -groups -management -self -lips -competition -entire -lived -technology -leaving -federal -tournament -bit -passed -hot -independent -awards -kingdom -mary -spent -fine -doesn -reported -##ling -jack -fall -raised -itself -stay -true -studio -1988 -sports -replaced -paris -systems -saint -leader -theatre -whose -market -capital -parents -spanish -canadian -earth -##ity -cut -degree -writing -bay -christian -awarded -natural -higher -bill -##as -coast -provided -previous -senior -ft -valley -organization -stopped -onto -countries -parts -conference -queen -security -interest -saying -allowed -master -earlier -phone -matter -smith -winning -try -happened -moving -campaign -los -##ley -breath -nearly -mid -1987 -certain -girls -date -italian -african -standing -fell -artist -##ted -shows -deal -mine -industry -1986 -##ng -everyone -republic -provide -collection -library -student -##ville -primary -owned -older -via -heavy -1st -makes -##able -attention -anyone -africa -##ri -stated -length -ended -fingers -command -staff -skin -foreign -opening -governor -okay -medal -kill -sun -cover -job -1985 -introduced -chest -hell -feeling -##ies -success -meet -reason -standard -meeting -novel -1984 -trade -source -buildings -##land -rose -guy -goal -##ur -chapter -native -husband -previously -unit -limited -entered -weeks -producer -operations -mountain -takes -covered -forced -related -roman -complete -successful -key -texas -cold -##ya -channel -1980 -traditional -films -dance -clear -approximately -500 -nine -van -prince -question -active -tracks -ireland -regional -silver -author -personal -sense -operation -##ine -economic -1983 -holding -twenty -isbn -additional -speed -hour -edition -regular -historic -places -whom -shook -movie -km² -secretary -prior -report -chicago -read -foundation -view -engine -scored -1982 -units -ask -airport -property -ready -immediately -lady -month -listed -contract -##de -manager -themselves -lines -##ki -navy -writer -meant -##ts -runs -##ro -practice -championships -singer -glass -commission -required -forest -starting -culture -generally -giving -access -attended -test -couple -stand -catholic -martin -caught -executive -##less -eye -##ey -thinking -chair -quite -shoulder -1979 -hope -decision -plays -defeated -municipality -whether -structure -offered -slowly -pain -ice -direction -##ion -paper -mission -1981 -mostly -200 -noted -individual -managed -nature -lives -plant -##ha -helped -except -studied -computer -figure -relationship -issue -significant -loss -die -smiled -gun -ago -highest -1972 -##am -male -bring -goals -mexico -problem -distance -commercial -completely -location -annual -famous -drive -1976 -neck -1978 -surface -caused -italy -understand -greek -highway -wrong -hotel -comes -appearance -joseph -double -issues -musical -companies -castle -income -review -assembly -bass -initially -parliament -artists -experience -1974 -particular -walk -foot -engineering -talking -window -dropped -##ter -miss -baby -boys -break -1975 -stars -edge -remember -policy -carried -train -stadium -bar -sex -angeles -evidence -##ge -becoming -assistant -soviet -1977 -upper -step -wing -1970 -youth -financial -reach -##ll -actor -numerous -##se -##st -nodded -arrived -##ation -minute -##nt -believed -sorry -complex -beautiful -victory -associated -temple -1968 -1973 -chance -perhaps -metal -##son -1945 -bishop -##et -lee -launched -particularly -tree -le -retired -subject -prize -contains -yeah -theory -empire -##ce -suddenly -waiting -trust -recording -##to -happy -terms -camp -champion -1971 -religious -pass -zealand -names -2nd -port -ancient -tom -corner -represented -watch -legal -anti -justice -cause -watched -brothers -45 -material -changes -simply -response -louis -fast -##ting -answer -60 -historical -1969 -stories -straight -create -feature -increased -rate -administration -virginia -el -activities -cultural -overall -winner -programs -basketball -legs -guard -beyond -cast -doctor -mm -flight -results -remains -cost -effect -winter -##ble -larger -islands -problems -chairman -grew -commander -isn -1967 -pay -failed -selected -hurt -fort -box -regiment -majority -journal -35 -edward -plans -##ke -##ni -shown -pretty -irish -characters -directly -scene -likely -operated -allow -spring -##j -junior -matches -looks -mike -houses -fellow -##tion -beach -marriage -##ham -##ive -rules -oil -65 -florida -expected -nearby -congress -sam -peace -recent -iii -wait -subsequently -cell -##do -variety -serving -agreed -please -poor -joe -pacific -attempt -wood -democratic -piece -prime -##ca -rural -mile -touch -appears -township -1964 -1966 -soldiers -##men -##ized -1965 -pennsylvania -closer -fighting -claimed -score -jones -physical -editor -##ous -filled -genus -specific -sitting -super -mom -##va -therefore -supported -status -fear -cases -store -meaning -wales -minor -spain -tower -focus -vice -frank -follow -parish -separate -golden -horse -fifth -remaining -branch -32 -presented -stared -##id -uses -secret -forms -##co -baseball -exactly -##ck -choice -note -discovered -travel -composed -truth -russia -ball -color -kiss -dad -wind -continue -ring -referred -numbers -digital -greater -##ns -metres -slightly -direct -increase -1960 -responsible -crew -rule -trees -troops -##no -broke -goes -individuals -hundred -weight -creek -sleep -memory -defense -provides -ordered -code -value -jewish -windows -1944 -safe -judge -whatever -corps -realized -growing -pre -##ga -cities -alexander -gaze -lies -spread -scott -letter -showed -situation -mayor -transport -watching -workers -extended -##li -expression -normal -##ment -chart -multiple -border -##ba -host -##ner -daily -mrs -walls -piano -##ko -heat -cannot -##ate -earned -products -drama -era -authority -seasons -join -grade -##io -sign -difficult -machine -1963 -territory -mainly -##wood -stations -squadron -1962 -stepped -iron -19th -##led -serve -appear -sky -speak -broken -charge -knowledge -kilometres -removed -ships -article -campus -simple -##ty -pushed -britain -##ve -leaves -recently -cd -soft -boston -latter -easy -acquired -poland -##sa -quality -officers -presence -planned -nations -mass -broadcast -jean -share -image -influence -wild -offer -emperor -electric -reading -headed -ability -promoted -yellow -ministry -1942 -throat -smaller -politician -##by -latin -spoke -cars -williams -males -lack -pop -80 -##ier -acting -seeing -consists -##ti -estate -1961 -pressure -johnson -newspaper -jr -chris -olympics -online -conditions -beat -elements -walking -vote -##field -needs -carolina -text -featuring -global -block -shirt -levels -francisco -purpose -females -et -dutch -duke -ahead -gas -twice -safety -serious -turning -highly -lieutenant -firm -maria -amount -mixed -daniel -proposed -perfect -agreement -affairs -3rd -seconds -contemporary -paid -1943 -prison -save -kitchen -label -administrative -intended -constructed -academic -nice -teacher -races -1956 -formerly -corporation -ben -nation -issued -shut -1958 -drums -housing -victoria -seems -opera -1959 -graduated -function -von -mentioned -picked -build -recognized -shortly -protection -picture -notable -exchange -elections -1980s -loved -percent -racing -fish -elizabeth -garden -volume -hockey -1941 -beside -settled -##ford -1940 -competed -replied -drew -1948 -actress -marine -scotland -steel -glanced -farm -steve -1957 -risk -tonight -positive -magic -singles -effects -gray -screen -dog -##ja -residents -bus -sides -none -secondary -literature -polish -destroyed -flying -founder -households -1939 -lay -reserve -usa -gallery -##ler -1946 -industrial -younger -approach -appearances -urban -ones -1950 -finish -avenue -powerful -fully -growth -page -honor -jersey -projects -advanced -revealed -basic -90 -infantry -pair -equipment -visit -33 -evening -search -grant -effort -solo -treatment -buried -republican -primarily -bottom -owner -1970s -israel -gives -jim -dream -bob -remain -spot -70 -notes -produce -champions -contact -ed -soul -accepted -ways -del -##ally -losing -split -price -capacity -basis -trial -questions -##ina -1955 -20th -guess -officially -memorial -naval -initial -##ization -whispered -median -engineer -##ful -sydney -##go -columbia -strength -300 -1952 -tears -senate -00 -card -asian -agent -1947 -software -44 -draw -warm -supposed -com -pro -##il -transferred -leaned -##at -candidate -escape -mountains -asia -potential -activity -entertainment -seem -traffic -jackson -murder -36 -slow -product -orchestra -haven -agency -bbc -taught -website -comedy -unable -storm -planning -albums -rugby -environment -scientific -grabbed -protect -##hi -boat -typically -1954 -1953 -damage -principal -divided -dedicated -mount -ohio -##berg -pick -fought -driver -##der -empty -shoulders -sort -thank -berlin -prominent -account -freedom -necessary -efforts -alex -headquarters -follows -alongside -des -simon -andrew -suggested -operating -learning -steps -1949 -sweet -technical -begin -easily -34 -teeth -speaking -settlement -scale -##sh -renamed -ray -max -enemy -semi -joint -compared -##rd -scottish -leadership -analysis -offers -georgia -pieces -captured -animal -deputy -guest -organized -##lin -tony -combined -method -challenge -1960s -huge -wants -battalion -sons -rise -crime -types -facilities -telling -path -1951 -platform -sit -1990s -##lo -tells -assigned -rich -pull -##ot -commonly -alive -##za -letters -concept -conducted -wearing -happen -bought -becomes -holy -gets -ocean -defeat -languages -purchased -coffee -occurred -titled -##q -declared -applied -sciences -concert -sounds -jazz -brain -##me -painting -fleet -tax -nick -##ius -michigan -count -animals -leaders -episodes -##line -content -##den -birth -##it -clubs -64 -palace -critical -refused -fair -leg -laughed -returning -surrounding -participated -formation -lifted -pointed -connected -rome -medicine -laid -taylor -santa -powers -adam -tall -shared -focused -knowing -yards -entrance -falls -##wa -calling -##ad -sources -chosen -beneath -resources -yard -##ite -nominated -silence -zone -defined -##que -gained -thirty -38 -bodies -moon -##ard -adopted -christmas -widely -register -apart -iran -premier -serves -du -unknown -parties -##les -generation -##ff -continues -quick -fields -brigade -quiet -teaching -clothes -impact -weapons -partner -flat -theater -supreme -1938 -37 -relations -##tor -plants -suffered -1936 -wilson -kids -begins -##age -1918 -seats -armed -internet -models -worth -laws -400 -communities -classes -background -knows -thanks -quarter -reaching -humans -carry -killing -format -kong -hong -setting -75 -architecture -disease -railroad -inc -possibly -wish -arthur -thoughts -harry -doors -density -##di -crowd -illinois -stomach -tone -unique -reports -anyway -##ir -liberal -der -vehicle -thick -dry -drug -faced -largely -facility -theme -holds -creation -strange -colonel -##mi -revolution -bell -politics -turns -silent -rail -relief -independence -combat -shape -write -determined -sales -learned -4th -finger -oxford -providing -1937 -heritage -fiction -situated -designated -allowing -distribution -hosted -##est -sight -interview -estimated -reduced -##ria -toronto -footballer -keeping -guys -damn -claim -motion -sport -sixth -stayed -##ze -en -rear -receive -handed -twelve -dress -audience -granted -brazil -##well -spirit -##ated -noticed -etc -olympic -representative -eric -tight -trouble -reviews -drink -vampire -missing -roles -ranked -newly -household -finals -wave -critics -##ee -phase -massachusetts -pilot -unlike -philadelphia -bright -guns -crown -organizations -roof -42 -respectively -clearly -tongue -marked -circle -fox -korea -bronze -brian -expanded -sexual -supply -yourself -inspired -labour -fc -##ah -reference -vision -draft -connection -brand -reasons -1935 -classic -driving -trip -jesus -cells -entry -1920 -neither -trail -claims -atlantic -orders -labor -nose -afraid -identified -intelligence -calls -cancer -attacked -passing -stephen -positions -imperial -grey -jason -39 -sunday -48 -swedish -avoid -extra -uncle -message -covers -allows -surprise -materials -fame -hunter -##ji -1930 -citizens -figures -davis -environmental -confirmed -shit -titles -di -performing -difference -acts -attacks -##ov -existing -votes -opportunity -nor -shop -entirely -trains -opposite -pakistan -##pa -develop -resulted -representatives -actions -reality -pressed -##ish -barely -wine -conversation -faculty -northwest -ends -documentary -nuclear -stock -grace -sets -eat -alternative -##ps -bag -resulting -creating -surprised -cemetery -1919 -drop -finding -sarah -cricket -streets -tradition -ride -1933 -exhibition -target -ear -explained -rain -composer -injury -apartment -municipal -educational -occupied -netherlands -clean -billion -constitution -learn -1914 -maximum -classical -francis -lose -opposition -jose -ontario -bear -core -hills -rolled -ending -drawn -permanent -fun -##tes -##lla -lewis -sites -chamber -ryan -##way -scoring -height -1934 -##house -lyrics -staring -55 -officials -1917 -snow -oldest -##tic -orange -##ger -qualified -interior -apparently -succeeded -thousand -dinner -lights -existence -fans -heavily -41 -greatest -conservative -send -bowl -plus -enter -catch -##un -economy -duty -1929 -speech -authorities -princess -performances -versions -shall -graduate -pictures -effective -remembered -poetry -desk -crossed -starring -starts -passenger -sharp -##ant -acres -ass -weather -falling -rank -fund -supporting -check -adult -publishing -heads -cm -southeast -lane -##burg -application -bc -##ura -les -condition -transfer -prevent -display -ex -regions -earl -federation -cool -relatively -answered -besides -1928 -obtained -portion -##town -mix -##ding -reaction -liked -dean -express -peak -1932 -##tte -counter -religion -chain -rare -miller -convention -aid -lie -vehicles -mobile -perform -squad -wonder -lying -crazy -sword -##ping -attempted -centuries -weren -philosophy -category -##ize -anna -interested -47 -sweden -wolf -frequently -abandoned -kg -literary -alliance -task -entitled -##ay -threw -promotion -factory -tiny -soccer -visited -matt -fm -achieved -52 -defence -internal -persian -43 -methods -##ging -arrested -otherwise -cambridge -programming -villages -elementary -districts -rooms -criminal -conflict -worry -trained -1931 -attempts -waited -signal -bird -truck -subsequent -programme -##ol -ad -49 -communist -details -faith -sector -patrick -carrying -laugh -##ss -controlled -korean -showing -origin -fuel -evil -1927 -##ent -brief -identity -darkness -address -pool -missed -publication -web -planet -ian -anne -wings -invited -##tt -briefly -standards -kissed -##be -ideas -climate -causing -walter -worse -albert -articles -winners -desire -aged -northeast -dangerous -gate -doubt -1922 -wooden -multi -##ky -poet -rising -funding -46 -communications -communication -violence -copies -prepared -ford -investigation -skills -1924 -pulling -electronic -##ak -##ial -##han -containing -ultimately -offices -singing -understanding -restaurant -tomorrow -fashion -christ -ward -da -pope -stands -5th -flow -studios -aired -commissioned -contained -exist -fresh -americans -##per -wrestling -approved -kid -employed -respect -suit -1925 -angel -asking -increasing -frame -angry -selling -1950s -thin -finds -##nd -temperature -statement -ali -explain -inhabitants -towns -extensive -narrow -51 -jane -flowers -images -promise -somewhere -object -fly -closely -##ls -1912 -bureau -cape -1926 -weekly -presidential -legislative -1921 -##ai -##au -launch -founding -##ny -978 -##ring -artillery -strike -un -institutions -roll -writers -landing -chose -kevin -anymore -pp -##ut -attorney -fit -dan -billboard -receiving -agricultural -breaking -sought -dave -admitted -lands -mexican -##bury -charlie -specifically -hole -iv -howard -credit -moscow -roads -accident -1923 -proved -wear -struck -hey -guards -stuff -slid -expansion -1915 -cat -anthony -##kin -melbourne -opposed -sub -southwest -architect -failure -plane -1916 -##ron -map -camera -tank -listen -regarding -wet -introduction -metropolitan -link -ep -fighter -inch -grown -gene -anger -fixed -buy -dvd -khan -domestic -worldwide -chapel -mill -functions -examples -##head -developing -1910 -turkey -hits -pocket -antonio -papers -grow -unless -circuit -18th -concerned -attached -journalist -selection -journey -converted -provincial -painted -hearing -aren -bands -negative -aside -wondered -knight -lap -survey -ma -##ow -noise -billy -##ium -shooting -guide -bedroom -priest -resistance -motor -homes -sounded -giant -##mer -150 -scenes -equal -comic -patients -hidden -solid -actual -bringing -afternoon -touched -funds -wedding -consisted -marie -canal -sr -kim -treaty -turkish -recognition -residence -cathedral -broad -knees -incident -shaped -fired -norwegian -handle -cheek -contest -represent -##pe -representing -beauty -##sen -birds -advantage -emergency -wrapped -drawing -notice -pink -broadcasting -##ong -somehow -bachelor -seventh -collected -registered -establishment -alan -assumed -chemical -personnel -roger -retirement -jeff -portuguese -wore -tied -device -threat -progress -advance -##ised -banks -hired -manchester -nfl -teachers -structures -forever -##bo -tennis -helping -saturday -sale -applications -junction -hip -incorporated -neighborhood -dressed -ceremony -##ds -influenced -hers -visual -stairs -decades -inner -kansas -hung -hoped -gain -scheduled -downtown -engaged -austria -clock -norway -certainly -pale -protected -1913 -victor -employees -plate -putting -surrounded -##ists -finishing -blues -tropical -##ries -minnesota -consider -philippines -accept -54 -retrieved -1900 -concern -anderson -properties -institution -gordon -successfully -vietnam -##dy -backing -outstanding -muslim -crossing -folk -producing -usual -demand -occurs -observed -lawyer -educated -##ana -kelly -string -pleasure -budget -items -quietly -colorado -philip -typical -##worth -derived -600 -survived -asks -mental -##ide -56 -jake -jews -distinguished -ltd -1911 -sri -extremely -53 -athletic -loud -thousands -worried -shadow -transportation -horses -weapon -arena -importance -users -tim -objects -contributed -dragon -douglas -aware -senator -johnny -jordan -sisters -engines -flag -investment -samuel -shock -capable -clark -row -wheel -refers -session -familiar -biggest -wins -hate -maintained -drove -hamilton -request -expressed -injured -underground -churches -walker -wars -tunnel -passes -stupid -agriculture -softly -cabinet -regarded -joining -indiana -##ea -##ms -push -dates -spend -behavior -woods -protein -gently -chase -morgan -mention -burning -wake -combination -occur -mirror -leads -jimmy -indeed -impossible -singapore -paintings -covering -##nes -soldier -locations -attendance -sell -historian -wisconsin -invasion -argued -painter -diego -changing -egypt -##don -experienced -inches -##ku -missouri -vol -grounds -spoken -switzerland -##gan -reform -rolling -ha -forget -massive -resigned -burned -allen -tennessee -locked -values -improved -##mo -wounded -universe -sick -dating -facing -pack -purchase -user -##pur -moments -##ul -merged -anniversary -1908 -coal -brick -understood -causes -dynasty -queensland -establish -stores -crisis -promote -hoping -views -cards -referee -extension -##si -raise -arizona -improve -colonial -formal -charged -##rt -palm -lucky -hide -rescue -faces -95 -feelings -candidates -juan -##ell -goods -6th -courses -weekend -59 -luke -cash -fallen -##om -delivered -affected -installed -carefully -tries -swiss -hollywood -costs -lincoln -responsibility -##he -shore -file -proper -normally -maryland -assistance -jump -constant -offering -friendly -waters -persons -realize -contain -trophy -800 -partnership -factor -58 -musicians -cry -bound -oregon -indicated -hero -houston -medium -##ure -consisting -somewhat -##ara -57 -cycle -##che -beer -moore -frederick -gotten -eleven -worst -weak -approached -arranged -chin -loan -universal -bond -fifteen -pattern -disappeared -##ney -translated -##zed -lip -arab -capture -interests -insurance -##chi -shifted -cave -prix -warning -sections -courts -coat -plot -smell -feed -golf -favorite -maintain -knife -vs -voted -degrees -finance -quebec -opinion -translation -manner -ruled -operate -productions -choose -musician -discovery -confused -tired -separated -stream -techniques -committed -attend -ranking -kings -throw -passengers -measure -horror -fan -mining -sand -danger -salt -calm -decade -dam -require -runner -##ik -rush -associate -greece -##ker -rivers -consecutive -matthew -##ski -sighed -sq -documents -steam -edited -closing -tie -accused -1905 -##ini -islamic -distributed -directors -organisation -bruce -7th -breathing -mad -lit -arrival -concrete -taste -08 -composition -shaking -faster -amateur -adjacent -stating -1906 -twin -flew -##ran -tokyo -publications -##tone -obviously -ridge -storage -1907 -carl -pages -concluded -desert -driven -universities -ages -terminal -sequence -borough -250 -constituency -creative -cousin -economics -dreams -margaret -notably -reduce -montreal -mode -17th -ears -saved -jan -vocal -##ica -1909 -andy -##jo -riding -roughly -threatened -##ise -meters -meanwhile -landed -compete -repeated -grass -czech -regularly -charges -tea -sudden -appeal -##ung -solution -describes -pierre -classification -glad -parking -##ning -belt -physics -99 -rachel -add -hungarian -participate -expedition -damaged -gift -childhood -85 -fifty -##red -mathematics -jumped -letting -defensive -mph -##ux -##gh -testing -##hip -hundreds -shoot -owners -matters -smoke -israeli -kentucky -dancing -mounted -grandfather -emma -designs -profit -argentina -##gs -truly -li -lawrence -cole -begun -detroit -willing -branches -smiling -decide -miami -enjoyed -recordings -##dale -poverty -ethnic -gay -##bi -gary -arabic -09 -accompanied -##one -##ons -fishing -determine -residential -acid -##ary -alice -returns -starred -mail -##ang -jonathan -strategy -##ue -net -forty -cook -businesses -equivalent -commonwealth -distinct -ill -##cy -seriously -##ors -##ped -shift -harris -replace -rio -imagine -formula -ensure -##ber -additionally -scheme -conservation -occasionally -purposes -feels -favor -##and -##ore -1930s -contrast -hanging -hunt -movies -1904 -instruments -victims -danish -christopher -busy -demon -sugar -earliest -colony -studying -balance -duties -##ks -belgium -slipped -carter -05 -visible -stages -iraq -fifa -##im -commune -forming -zero -07 -continuing -talked -counties -legend -bathroom -option -tail -clay -daughters -afterwards -severe -jaw -visitors -##ded -devices -aviation -russell -kate -##vi -entering -subjects -##ino -temporary -swimming -forth -smooth -ghost -audio -bush -operates -rocks -movements -signs -eddie -##tz -ann -voices -honorary -06 -memories -dallas -pure -measures -racial -promised -66 -harvard -ceo -16th -parliamentary -indicate -benefit -flesh -dublin -louisiana -1902 -1901 -patient -sleeping -1903 -membership -coastal -medieval -wanting -element -scholars -rice -62 -limit -survive -makeup -rating -definitely -collaboration -obvious -##tan -boss -ms -baron -birthday -linked -soil -diocese -##lan -ncaa -##mann -offensive -shell -shouldn -waist -##tus -plain -ross -organ -resolution -manufacturing -adding -relative -kennedy -98 -whilst -moth -marketing -gardens -crash -72 -heading -partners -credited -carlos -moves -cable -##zi -marshall -##out -depending -bottle -represents -rejected -responded -existed -04 -jobs -denmark -lock -##ating -treated -graham -routes -talent -commissioner -drugs -secure -tests -reign -restored -photography -##gi -contributions -oklahoma -designer -disc -grin -seattle -robin -paused -atlanta -unusual -##gate -praised -las -laughing -satellite -hungary -visiting -##sky -interesting -factors -deck -poems -norman -##water -stuck -speaker -rifle -domain -premiered -##her -dc -comics -actors -01 -reputation -eliminated -8th -ceiling -prisoners -script -##nce -leather -austin -mississippi -rapidly -admiral -parallel -charlotte -guilty -tools -gender -divisions -fruit -##bs -laboratory -nelson -fantasy -marry -rapid -aunt -tribe -requirements -aspects -suicide -amongst -adams -bone -ukraine -abc -kick -sees -edinburgh -clothing -column -rough -gods -hunting -broadway -gathered -concerns -##ek -spending -ty -12th -snapped -requires -solar -bones -cavalry -##tta -iowa -drinking -waste -index -franklin -charity -thompson -stewart -tip -flash -landscape -friday -enjoy -singh -poem -listening -##back -eighth -fred -differences -adapted -bomb -ukrainian -surgery -corporate -masters -anywhere -##more -waves -odd -sean -portugal -orleans -dick -debate -kent -eating -puerto -cleared -96 -expect -cinema -97 -guitarist -blocks -electrical -agree -involving -depth -dying -panel -struggle -##ged -peninsula -adults -novels -emerged -vienna -metro -debuted -shoes -tamil -songwriter -meets -prove -beating -instance -heaven -scared -sending -marks -artistic -passage -superior -03 -significantly -shopping -##tive -retained -##izing -malaysia -technique -cheeks -##ola -warren -maintenance -destroy -extreme -allied -120 -appearing -##yn -fill -advice -alabama -qualifying -policies -cleveland -hat -battery -smart -authors -10th -soundtrack -acted -dated -lb -glance -equipped -coalition -funny -outer -ambassador -roy -possibility -couples -campbell -dna -loose -ethan -supplies -1898 -gonna -88 -monster -##res -shake -agents -frequency -springs -dogs -practices -61 -gang -plastic -easier -suggests -gulf -blade -exposed -colors -industries -markets -pan -nervous -electoral -charts -legislation -ownership -##idae -mac -appointment -shield -copy -assault -socialist -abbey -monument -license -throne -employment -jay -93 -replacement -charter -cloud -powered -suffering -accounts -oak -connecticut -strongly -wright -colour -crystal -13th -context -welsh -networks -voiced -gabriel -jerry -##cing -forehead -mp -##ens -manage -schedule -totally -remix -##ii -forests -occupation -print -nicholas -brazilian -strategic -vampires -engineers -76 -roots -seek -correct -instrumental -und -alfred -backed -hop -##des -stanley -robinson -traveled -wayne -welcome -austrian -achieve -67 -exit -rates -1899 -strip -whereas -##cs -sing -deeply -adventure -bobby -rick -jamie -careful -components -cap -useful -personality -knee -##shi -pushing -hosts -02 -protest -ca -ottoman -symphony -##sis -63 -boundary -1890 -processes -considering -considerable -tons -##work -##ft -##nia -cooper -trading -dear -conduct -91 -illegal -apple -revolutionary -holiday -definition -harder -##van -jacob -circumstances -destruction -##lle -popularity -grip -classified -liverpool -donald -baltimore -flows -seeking -honour -approval -92 -mechanical -till -happening -statue -critic -increasingly -immediate -describe -commerce -stare -##ster -indonesia -meat -rounds -boats -baker -orthodox -depression -formally -worn -naked -claire -muttered -sentence -11th -emily -document -77 -criticism -wished -vessel -spiritual -bent -virgin -parker -minimum -murray -lunch -danny -printed -compilation -keyboards -false -blow -belonged -68 -raising -78 -cutting -##board -pittsburgh -##up -9th -shadows -81 -hated -indigenous -jon -15th -barry -scholar -ah -##zer -oliver -##gy -stick -susan -meetings -attracted -spell -romantic -##ver -ye -1895 -photo -demanded -customers -##ac -1896 -logan -revival -keys -modified -commanded -jeans -##ious -upset -raw -phil -detective -hiding -resident -vincent -##bly -experiences -diamond -defeating -coverage -lucas -external -parks -franchise -helen -bible -successor -percussion -celebrated -il -lift -profile -clan -romania -##ied -mills -##su -nobody -achievement -shrugged -fault -1897 -rhythm -initiative -breakfast -carbon -700 -69 -lasted -violent -74 -wound -ken -killer -gradually -filmed -°c -dollars -processing -94 -remove -criticized -guests -sang -chemistry -##vin -legislature -disney -##bridge -uniform -escaped -integrated -proposal -purple -denied -liquid -karl -influential -morris -nights -stones -intense -experimental -twisted -71 -84 -##ld -pace -nazi -mitchell -ny -blind -reporter -newspapers -14th -centers -burn -basin -forgotten -surviving -filed -collections -monastery -losses -manual -couch -description -appropriate -merely -tag -missions -sebastian -restoration -replacing -triple -73 -elder -julia -warriors -benjamin -julian -convinced -stronger -amazing -declined -versus -merchant -happens -output -finland -bare -barbara -absence -ignored -dawn -injuries -##port -producers -##ram -82 -luis -##ities -kw -admit -expensive -electricity -nba -exception -symbol -##ving -ladies -shower -sheriff -characteristics -##je -aimed -button -ratio -effectively -summit -angle -jury -bears -foster -vessels -pants -executed -evans -dozen -advertising -kicked -patrol -1889 -competitions -lifetime -principles -athletics -##logy -birmingham -sponsored -89 -rob -nomination -1893 -acoustic -##sm -creature -longest -##tra -credits -harbor -dust -josh -##so -territories -milk -infrastructure -completion -thailand -indians -leon -archbishop -##sy -assist -pitch -blake -arrangement -girlfriend -serbian -operational -hence -sad -scent -fur -dj -sessions -hp -refer -rarely -##ora -exists -1892 -##ten -scientists -dirty -penalty -burst -portrait -seed -79 -pole -limits -rival -1894 -stable -alpha -grave -constitutional -alcohol -arrest -flower -mystery -devil -architectural -relationships -greatly -habitat -##istic -larry -progressive -remote -cotton -##ics -##ok -preserved -reaches -##ming -cited -86 -vast -scholarship -decisions -cbs -joy -teach -1885 -editions -knocked -eve -searching -partly -participation -gap -animated -fate -excellent -##ett -na -87 -alternate -saints -youngest -##ily -climbed -##ita -##tors -suggest -##ct -discussion -staying -choir -lakes -jacket -revenue -nevertheless -peaked -instrument -wondering -annually -managing -neil -1891 -signing -terry -##ice -apply -clinical -brooklyn -aim -catherine -fuck -farmers -figured -ninth -pride -hugh -evolution -ordinary -involvement -comfortable -shouted -tech -encouraged -taiwan -representation -sharing -##lia -##em -panic -exact -cargo -competing -fat -cried -83 -1920s -occasions -pa -cabin -borders -utah -marcus -##isation -badly -muscles -##ance -victorian -transition -warner -bet -permission -##rin -slave -terrible -similarly -shares -seth -uefa -possession -medals -benefits -colleges -lowered -perfectly -mall -transit -##ye -##kar -publisher -##ened -harrison -deaths -elevation -##ae -asleep -machines -sigh -ash -hardly -argument -occasion -parent -leo -decline -1888 -contribution -##ua -concentration -1000 -opportunities -hispanic -guardian -extent -emotions -hips -mason -volumes -bloody -controversy -diameter -steady -mistake -phoenix -identify -violin -##sk -departure -richmond -spin -funeral -enemies -1864 -gear -literally -connor -random -sergeant -grab -confusion -1865 -transmission -informed -op -leaning -sacred -suspended -thinks -gates -portland -luck -agencies -yours -hull -expert -muscle -layer -practical -sculpture -jerusalem -latest -lloyd -statistics -deeper -recommended -warrior -arkansas -mess -supports -greg -eagle -1880 -recovered -rated -concerts -rushed -##ano -stops -eggs -files -premiere -keith -##vo -delhi -turner -pit -affair -belief -paint -##zing -mate -##ach -##ev -victim -##ology -withdrew -bonus -styles -fled -##ud -glasgow -technologies -funded -nbc -adaptation -##ata -portrayed -cooperation -supporters -judges -bernard -justin -hallway -ralph -##ick -graduating -controversial -distant -continental -spider -bite -##ho -recognize -intention -mixing -##ese -egyptian -bow -tourism -suppose -claiming -tiger -dominated -participants -vi -##ru -nurse -partially -tape -##rum -psychology -##rn -essential -touring -duo -voting -civilian -emotional -channels -##king -apparent -hebrew -1887 -tommy -carrier -intersection -beast -hudson -##gar -##zo -lab -nova -bench -discuss -costa -##ered -detailed -behalf -drivers -unfortunately -obtain -##lis -rocky -##dae -siege -friendship -honey -##rian -1861 -amy -hang -posted -governments -collins -respond -wildlife -preferred -operator -##po -laura -pregnant -videos -dennis -suspected -boots -instantly -weird -automatic -businessman -alleged -placing -throwing -ph -mood -1862 -perry -venue -jet -remainder -##lli -##ci -passion -biological -boyfriend -1863 -dirt -buffalo -ron -segment -fa -abuse -##era -genre -thrown -stroke -colored -stress -exercise -displayed -##gen -struggled -##tti -abroad -dramatic -wonderful -thereafter -madrid -component -widespread -##sed -tale -citizen -todd -monday -1886 -vancouver -overseas -forcing -crying -descent -##ris -discussed -substantial -ranks -regime -1870 -provinces -switch -drum -zane -ted -tribes -proof -lp -cream -researchers -volunteer -manor -silk -milan -donated -allies -venture -principle -delivery -enterprise -##ves -##ans -bars -traditionally -witch -reminded -copper -##uk -pete -inter -links -colin -grinned -elsewhere -competitive -frequent -##oy -scream -##hu -tension -texts -submarine -finnish -defending -defend -pat -detail -1884 -affiliated -stuart -themes -villa -periods -tool -belgian -ruling -crimes -answers -folded -licensed -resort -demolished -hans -lucy -1881 -lion -traded -photographs -writes -craig -##fa -trials -generated -beth -noble -debt -percentage -yorkshire -erected -ss -viewed -grades -confidence -ceased -islam -telephone -retail -##ible -chile -m² -roberts -sixteen -##ich -commented -hampshire -innocent -dual -pounds -checked -regulations -afghanistan -sung -rico -liberty -assets -bigger -options -angels -relegated -tribute -wells -attending -leaf -##yan -butler -romanian -forum -monthly -lisa -patterns -gmina -##tory -madison -hurricane -rev -##ians -bristol -##ula -elite -valuable -disaster -democracy -awareness -germans -freyja -##ins -loop -absolutely -paying -populations -maine -sole -prayer -spencer -releases -doorway -bull -##ani -lover -midnight -conclusion -##sson -thirteen -lily -mediterranean -##lt -nhl -proud -sample -##hill -drummer -guinea -##ova -murphy -climb -##ston -instant -attributed -horn -ain -railways -steven -##ao -autumn -ferry -opponent -root -traveling -secured -corridor -stretched -tales -sheet -trinity -cattle -helps -indicates -manhattan -murdered -fitted -1882 -gentle -grandmother -mines -shocked -vegas -produces -##light -caribbean -##ou -belong -continuous -desperate -drunk -historically -trio -waved -raf -dealing -nathan -bat -murmured -interrupted -residing -scientist -pioneer -harold -aaron -##net -delta -attempting -minority -mini -believes -chorus -tend -lots -eyed -indoor -load -shots -updated -jail -##llo -concerning -connecting -wealth -##ved -slaves -arrive -rangers -sufficient -rebuilt -##wick -cardinal -flood -muhammad -whenever -relation -runners -moral -repair -viewers -arriving -revenge -punk -assisted -bath -fairly -breathe -lists -innings -illustrated -whisper -nearest -voters -clinton -ties -ultimate -screamed -beijing -lions -andre -fictional -gathering -comfort -radar -suitable -dismissed -hms -ban -pine -wrist -atmosphere -voivodeship -bid -timber -##ned -##nan -giants -##ane -cameron -recovery -uss -identical -categories -switched -serbia -laughter -noah -ensemble -therapy -peoples -touching -##off -locally -pearl -platforms -everywhere -ballet -tables -lanka -herbert -outdoor -toured -derek -1883 -spaces -contested -swept -1878 -exclusive -slight -connections -##dra -winds -prisoner -collective -bangladesh -tube -publicly -wealthy -thai -##ys -isolated -select -##ric -insisted -pen -fortune -ticket -spotted -reportedly -animation -enforcement -tanks -110 -decides -wider -lowest -owen -##time -nod -hitting -##hn -gregory -furthermore -magazines -fighters -solutions -##ery -pointing -requested -peru -reed -chancellor -knights -mask -worker -eldest -flames -reduction -1860 -volunteers -##tis -reporting -##hl -wire -advisory -endemic -origins -settlers -pursue -knock -consumer -1876 -eu -compound -creatures -mansion -sentenced -ivan -deployed -guitars -frowned -involves -mechanism -kilometers -perspective -shops -maps -terminus -duncan -alien -fist -bridges -##pers -heroes -fed -derby -swallowed -##ros -patent -sara -illness -characterized -adventures -slide -hawaii -jurisdiction -##op -organised -##side -adelaide -walks -biology -se -##ties -rogers -swing -tightly -boundaries -##rie -prepare -implementation -stolen -##sha -certified -colombia -edwards -garage -##mm -recalled -##ball -rage -harm -nigeria -breast -##ren -furniture -pupils -settle -##lus -cuba -balls -client -alaska -21st -linear -thrust -celebration -latino -genetic -terror -##cia -##ening -lightning -fee -witness -lodge -establishing -skull -##ique -earning -hood -##ei -rebellion -wang -sporting -warned -missile -devoted -activist -porch -worship -fourteen -package -1871 -decorated -##shire -housed -##ock -chess -sailed -doctors -oscar -joan -treat -garcia -harbour -jeremy -##ire -traditions -dominant -jacques -##gon -##wan -relocated -1879 -amendment -sized -companion -simultaneously -volleyball -spun -acre -increases -stopping -loves -belongs -affect -drafted -tossed -scout -battles -1875 -filming -shoved -munich -tenure -vertical -romance -pc -##cher -argue -##ical -craft -ranging -www -opens -honest -tyler -yesterday -virtual -##let -muslims -reveal -snake -immigrants -radical -screaming -speakers -firing -saving -belonging -ease -lighting -prefecture -blame -farmer -hungry -grows -rubbed -beam -sur -subsidiary -##cha -armenian -sao -dropping -conventional -##fer -microsoft -reply -qualify -spots -1867 -sweat -festivals -##ken -immigration -physician -discover -exposure -sandy -explanation -isaac -implemented -##fish -hart -initiated -connect -stakes -presents -heights -householder -pleased -tourist -regardless -slip -closest -##ction -surely -sultan -brings -riley -preparation -aboard -slammed -baptist -experiment -ongoing -interstate -organic -playoffs -##ika -1877 -130 -##tar -hindu -error -tours -tier -plenty -arrangements -talks -trapped -excited -sank -ho -athens -1872 -denver -welfare -suburb -athletes -trick -diverse -belly -exclusively -yelled -1868 -##med -conversion -##ette -1874 -internationally -computers -conductor -abilities -sensitive -hello -dispute -measured -globe -rocket -prices -amsterdam -flights -tigers -inn -municipalities -emotion -references -3d -##mus -explains -airlines -manufactured -pm -archaeological -1873 -interpretation -devon -comment -##ites -settlements -kissing -absolute -improvement -suite -impressed -barcelona -sullivan -jefferson -towers -jesse -julie -##tin -##lu -grandson -hi -gauge -regard -rings -interviews -trace -raymond -thumb -departments -burns -serial -bulgarian -scores -demonstrated -##ix -1866 -kyle -alberta -underneath -romanized -##ward -relieved -acquisition -phrase -cliff -reveals -han -cuts -merger -custom -##dar -nee -gilbert -graduation -##nts -assessment -cafe -difficulty -demands -swung -democrat -jennifer -commons -1940s -grove -##yo -completing -focuses -sum -substitute -bearing -stretch -reception -##py -reflected -essentially -destination -pairs -##ched -survival -resource -##bach -promoting -doubles -messages -tear -##down -##fully -parade -florence -harvey -incumbent -partial -framework -900 -pedro -frozen -procedure -olivia -controls -##mic -shelter -personally -temperatures -##od -brisbane -tested -sits -marble -comprehensive -oxygen -leonard -##kov -inaugural -iranian -referring -quarters -attitude -##ivity -mainstream -lined -mars -dakota -norfolk -unsuccessful -##° -explosion -helicopter -congressional -##sing -inspector -bitch -seal -departed -divine -##ters -coaching -examination -punishment -manufacturer -sink -columns -unincorporated -signals -nevada -squeezed -dylan -dining -photos -martial -manuel -eighteen -elevator -brushed -plates -ministers -ivy -congregation -##len -slept -specialized -taxes -curve -restricted -negotiations -likes -statistical -arnold -inspiration -execution -bold -intermediate -significance -margin -ruler -wheels -gothic -intellectual -dependent -listened -eligible -buses -widow -syria -earn -cincinnati -collapsed -recipient -secrets -accessible -philippine -maritime -goddess -clerk -surrender -breaks -playoff -database -##ified -##lon -ideal -beetle -aspect -soap -regulation -strings -expand -anglo -shorter -crosses -retreat -tough -coins -wallace -directions -pressing -##oon -shipping -locomotives -comparison -topics -nephew -##mes -distinction -honors -travelled -sierra -ibn -##over -fortress -sa -recognised -carved -1869 -clients -##dan -intent -##mar -coaches -describing -bread -##ington -beaten -northwestern -##ona -merit -youtube -collapse -challenges -em -historians -objective -submitted -virus -attacking -drake -assume -##ere -diseases -marc -stem -leeds -##cus -##ab -farming -glasses -##lock -visits -nowhere -fellowship -relevant -carries -restaurants -experiments -101 -constantly -bases -targets -shah -tenth -opponents -verse -territorial -##ira -writings -corruption -##hs -instruction -inherited -reverse -emphasis -##vic -employee -arch -keeps -rabbi -watson -payment -uh -##ala -nancy -##tre -venice -fastest -sexy -banned -adrian -properly -ruth -touchdown -dollar -boards -metre -circles -edges -favour -comments -ok -travels -liberation -scattered -firmly -##ular -holland -permitted -diesel -kenya -den -originated -##ral -demons -resumed -dragged -rider -##rus -servant -blinked -extend -torn -##ias -##sey -input -meal -everybody -cylinder -kinds -camps -##fe -bullet -logic -##wn -croatian -evolved -healthy -fool -chocolate -wise -preserve -pradesh -##ess -respective -1850 -##ew -chicken -artificial -gross -corresponding -convicted -cage -caroline -dialogue -##dor -narrative -stranger -mario -br -christianity -failing -trent -commanding -buddhist -1848 -maurice -focusing -yale -bike -altitude -##ering -mouse -revised -##sley -veteran -##ig -pulls -theology -crashed -campaigns -legion -##ability -drag -excellence -customer -cancelled -intensity -excuse -##lar -liga -participating -contributing -printing -##burn -variable -##rk -curious -bin -legacy -renaissance -##my -symptoms -binding -vocalist -dancer -##nie -grammar -gospel -democrats -ya -enters -sc -diplomatic -hitler -##ser -clouds -mathematical -quit -defended -oriented -##heim -fundamental -hardware -impressive -equally -convince -confederate -guilt -chuck -sliding -##ware -magnetic -narrowed -petersburg -bulgaria -otto -phd -skill -##ama -reader -hopes -pitcher -reservoir -hearts -automatically -expecting -mysterious -bennett -extensively -imagined -seeds -monitor -fix -##ative -journalism -struggling -signature -ranch -encounter -photographer -observation -protests -##pin -influences -##hr -calendar -##all -cruz -croatia -locomotive -hughes -naturally -shakespeare -basement -hook -uncredited -faded -theories -approaches -dare -phillips -filling -fury -obama -##ain -efficient -arc -deliver -min -raid -breeding -inducted -leagues -efficiency -axis -montana -eagles -##ked -supplied -instructions -karen -picking -indicating -trap -anchor -practically -christians -tomb -vary -occasional -electronics -lords -readers -newcastle -faint -innovation -collect -situations -engagement -160 -claude -mixture -##feld -peer -tissue -logo -lean -##ration -°f -floors -##ven -architects -reducing -##our -##ments -rope -1859 -ottawa -##har -samples -banking -declaration -proteins -resignation -francois -saudi -advocate -exhibited -armor -twins -divorce -##ras -abraham -reviewed -jo -temporarily -matrix -physically -pulse -curled -##ena -difficulties -bengal -usage -##ban -annie -riders -certificate -##pi -holes -warsaw -distinctive -jessica -##mon -mutual -1857 -customs -circular -eugene -removal -loaded -mere -vulnerable -depicted -generations -dame -heir -enormous -lightly -climbing -pitched -lessons -pilots -nepal -ram -google -preparing -brad -louise -renowned -##₂ -liam -##ably -plaza -shaw -sophie -brilliant -bills -##bar -##nik -fucking -mainland -server -pleasant -seized -veterans -jerked -fail -beta -brush -radiation -stored -warmth -southeastern -nate -sin -raced -berkeley -joke -athlete -designation -trunk -##low -roland -qualification -archives -heels -artwork -receives -judicial -reserves -##bed -woke -installation -abu -floating -fake -lesser -excitement -interface -concentrated -addressed -characteristic -amanda -saxophone -monk -auto -##bus -releasing -egg -dies -interaction -defender -ce -outbreak -glory -loving -##bert -sequel -consciousness -http -awake -ski -enrolled -##ress -handling -rookie -brow -somebody -biography -warfare -amounts -contracts -presentation -fabric -dissolved -challenged -meter -psychological -lt -elevated -rally -accurate -##tha -hospitals -undergraduate -specialist -venezuela -exhibit -shed -nursing -protestant -fluid -structural -footage -jared -consistent -prey -##ska -succession -reflect -exile -lebanon -wiped -suspect -shanghai -resting -integration -preservation -marvel -variant -pirates -sheep -rounded -capita -sailing -colonies -manuscript -deemed -variations -clarke -functional -emerging -boxing -relaxed -curse -azerbaijan -heavyweight -nickname -editorial -rang -grid -tightened -earthquake -flashed -miguel -rushing -##ches -improvements -boxes -brooks -180 -consumption -molecular -felix -societies -repeatedly -variation -aids -civic -graphics -professionals -realm -autonomous -receiver -delayed -workshop -militia -chairs -trump -canyon -##point -harsh -extending -lovely -happiness -##jan -stake -eyebrows -embassy -wellington -hannah -##ella -sony -corners -bishops -swear -cloth -contents -xi -namely -commenced -1854 -stanford -nashville -courage -graphic -commitment -garrison -##bin -hamlet -clearing -rebels -attraction -literacy -cooking -ruins -temples -jenny -humanity -celebrate -hasn -freight -sixty -rebel -bastard -##art -newton -##ada -deer -##ges -##ching -smiles -delaware -singers -##ets -approaching -assists -flame -##ph -boulevard -barrel -planted -##ome -pursuit -##sia -consequences -posts -shallow -invitation -rode -depot -ernest -kane -rod -concepts -preston -topic -chambers -striking -blast -arrives -descendants -montgomery -ranges -worlds -##lay -##ari -span -chaos -praise -##ag -fewer -1855 -sanctuary -mud -fbi -##ions -programmes -maintaining -unity -harper -bore -handsome -closure -tournaments -thunder -nebraska -linda -facade -puts -satisfied -argentine -dale -cork -dome -panama -##yl -1858 -tasks -experts -##ates -feeding -equation -##las -##ida -##tu -engage -bryan -##ax -um -quartet -melody -disbanded -sheffield -blocked -gasped -delay -kisses -maggie -connects -##non -sts -poured -creator -publishers -##we -guided -ellis -extinct -hug -gaining -##ord -complicated -##bility -poll -clenched -investigate -##use -thereby -quantum -spine -cdp -humor -kills -administered -semifinals -##du -encountered -ignore -##bu -commentary -##maker -bother -roosevelt -140 -plains -halfway -flowing -cultures -crack -imprisoned -neighboring -airline -##ses -##view -##mate -##ec -gather -wolves -marathon -transformed -##ill -cruise -organisations -carol -punch -exhibitions -numbered -alarm -ratings -daddy -silently -##stein -queens -colours -impression -guidance -liu -tactical -##rat -marshal -della -arrow -##ings -rested -feared -tender -owns -bitter -advisor -escort -##ides -spare -farms -grants -##ene -dragons -encourage -colleagues -cameras -##und -sucked -pile -spirits -prague -statements -suspension -landmark -fence -torture -recreation -bags -permanently -survivors -pond -spy -predecessor -bombing -coup -##og -protecting -transformation -glow -##lands -##book -dug -priests -andrea -feat -barn -jumping -##chen -##ologist -##con -casualties -stern -auckland -pipe -serie -revealing -ba -##bel -trevor -mercy -spectrum -yang -consist -governing -collaborated -possessed -epic -comprises -blew -shane -##ack -lopez -honored -magical -sacrifice -judgment -perceived -hammer -mtv -baronet -tune -das -missionary -sheets -350 -neutral -oral -threatening -attractive -shade -aims -seminary -##master -estates -1856 -michel -wounds -refugees -manufacturers -##nic -mercury -syndrome -porter -##iya -##din -hamburg -identification -upstairs -purse -widened -pause -cared -breathed -affiliate -santiago -prevented -celtic -fisher -125 -recruited -byzantine -reconstruction -farther -##mp -diet -sake -au -spite -sensation -##ert -blank -separation -105 -##hon -vladimir -armies -anime -##lie -accommodate -orbit -cult -sofia -archive -##ify -##box -founders -sustained -disorder -honours -northeastern -mia -crops -violet -threats -blanket -fires -canton -followers -southwestern -prototype -voyage -assignment -altered -moderate -protocol -pistol -##eo -questioned -brass -lifting -1852 -math -authored -##ual -doug -dimensional -dynamic -##san -1851 -pronounced -grateful -quest -uncomfortable -boom -presidency -stevens -relating -politicians -chen -barrier -quinn -diana -mosque -tribal -cheese -palmer -portions -sometime -chester -treasure -wu -bend -download -millions -reforms -registration -##osa -consequently -monitoring -ate -preliminary -brandon -invented -ps -eaten -exterior -intervention -ports -documented -log -displays -lecture -sally -favourite -##itz -vermont -lo -invisible -isle -breed -##ator -journalists -relay -speaks -backward -explore -midfielder -actively -stefan -procedures -cannon -blond -kenneth -centered -servants -chains -libraries -malcolm -essex -henri -slavery -##hal -facts -fairy -coached -cassie -cats -washed -cop -##fi -announcement -item -2000s -vinyl -activated -marco -frontier -growled -curriculum -##das -loyal -accomplished -leslie -ritual -kenny -##00 -vii -napoleon -hollow -hybrid -jungle -stationed -friedrich -counted -##ulated -platinum -theatrical -seated -col -rubber -glen -1840 -diversity -healing -extends -id -provisions -administrator -columbus -##oe -tributary -te -assured -org -##uous -prestigious -examined -lectures -grammy -ronald -associations -bailey -allan -essays -flute -believing -consultant -proceedings -travelling -1853 -kit -kerala -yugoslavia -buddy -methodist -##ith -burial -centres -batman -##nda -discontinued -bo -dock -stockholm -lungs -severely -##nk -citing -manga -##ugh -steal -mumbai -iraqi -robot -celebrity -bride -broadcasts -abolished -pot -joel -overhead -franz -packed -reconnaissance -johann -acknowledged -introduce -handled -doctorate -developments -drinks -alley -palestine -##nis -##aki -proceeded -recover -bradley -grain -patch -afford -infection -nationalist -legendary -##ath -interchange -virtually -gen -gravity -exploration -amber -vital -wishes -powell -doctrine -elbow -screenplay -##bird -contribute -indonesian -pet -creates -##com -enzyme -kylie -discipline -drops -manila -hunger -##ien -layers -suffer -fever -bits -monica -keyboard -manages -##hood -searched -appeals -##bad -testament -grande -reid -##war -beliefs -congo -##ification -##dia -si -requiring -##via -casey -1849 -regret -streak -rape -depends -syrian -sprint -pound -tourists -upcoming -pub -##xi -tense -##els -practiced -echo -nationwide -guild -motorcycle -liz -##zar -chiefs -desired -elena -bye -precious -absorbed -relatives -booth -pianist -##mal -citizenship -exhausted -wilhelm -##ceae -##hed -noting -quarterback -urge -hectares -##gue -ace -holly -##tal -blonde -davies -parked -sustainable -stepping -twentieth -airfield -galaxy -nest -chip -##nell -tan -shaft -paulo -requirement -##zy -paradise -tobacco -trans -renewed -vietnamese -##cker -##ju -suggesting -catching -holmes -enjoying -md -trips -colt -holder -butterfly -nerve -reformed -cherry -bowling -trailer -carriage -goodbye -appreciate -toy -joshua -interactive -enabled -involve -##kan -collar -determination -bunch -facebook -recall -shorts -superintendent -episcopal -frustration -giovanni -nineteenth -laser -privately -array -circulation -##ovic -armstrong -deals -painful -permit -discrimination -##wi -aires -retiring -cottage -ni -##sta -horizon -ellen -jamaica -ripped -fernando -chapters -playstation -patron -lecturer -navigation -behaviour -genes -georgian -export -solomon -rivals -swift -seventeen -rodriguez -princeton -independently -sox -1847 -arguing -entity -casting -hank -criteria -oakland -geographic -milwaukee -reflection -expanding -conquest -dubbed -##tv -halt -brave -brunswick -doi -arched -curtis -divorced -predominantly -somerset -streams -ugly -zoo -horrible -curved -buenos -fierce -dictionary -vector -theological -unions -handful -stability -chan -punjab -segments -##lly -altar -ignoring -gesture -monsters -pastor -##stone -thighs -unexpected -operators -abruptly -coin -compiled -associates -improving -migration -pin -##ose -compact -collegiate -reserved -##urs -quarterfinals -roster -restore -assembled -hurry -oval -##cies -1846 -flags -martha -##del -victories -sharply -##rated -argues -deadly -neo -drawings -symbols -performer -##iel -griffin -restrictions -editing -andrews -java -journals -arabia -compositions -dee -pierce -removing -hindi -casino -runway -civilians -minds -nasa -hotels -##zation -refuge -rent -retain -potentially -conferences -suburban -conducting -##tto -##tions -##tle -descended -massacre -##cal -ammunition -terrain -fork -souls -counts -chelsea -durham -drives -cab -##bank -perth -realizing -palestinian -finn -simpson -##dal -betty -##ule -moreover -particles -cardinals -tent -evaluation -extraordinary -##oid -inscription -##works -wednesday -chloe -maintains -panels -ashley -trucks -##nation -cluster -sunlight -strikes -zhang -##wing -dialect -canon -##ap -tucked -##ws -collecting -##mas -##can -##sville -maker -quoted -evan -franco -aria -buying -cleaning -eva -closet -provision -apollo -clinic -rat -##ez -necessarily -ac -##gle -##ising -venues -flipped -cent -spreading -trustees -checking -authorized -##sco -disappointed -##ado -notion -duration -trumpet -hesitated -topped -brussels -rolls -theoretical -hint -define -aggressive -repeat -wash -peaceful -optical -width -allegedly -mcdonald -strict -copyright -##illa -investors -mar -jam -witnesses -sounding -miranda -michelle -privacy -hugo -harmony -##pp -valid -lynn -glared -nina -102 -headquartered -diving -boarding -gibson -##ncy -albanian -marsh -routine -dealt -enhanced -er -intelligent -substance -targeted -enlisted -discovers -spinning -observations -pissed -smoking -rebecca -capitol -visa -varied -costume -seemingly -indies -compensation -surgeon -thursday -arsenal -westminster -suburbs -rid -anglican -##ridge -knots -foods -alumni -lighter -fraser -whoever -portal -scandal -##ray -gavin -advised -instructor -flooding -terrorist -##ale -teenage -interim -senses -duck -teen -thesis -abby -eager -overcome -##ile -newport -glenn -rises -shame -##cc -prompted -priority -forgot -bomber -nicolas -protective -360 -cartoon -katherine -breeze -lonely -trusted -henderson -richardson -relax -banner -candy -palms -remarkable -##rio -legends -cricketer -essay -ordained -edmund -rifles -trigger -##uri -##away -sail -alert -1830 -audiences -penn -sussex -siblings -pursued -indianapolis -resist -rosa -consequence -succeed -avoided -1845 -##ulation -inland -##tie -##nna -counsel -profession -chronicle -hurried -##una -eyebrow -eventual -bleeding -innovative -cure -##dom -committees -accounting -con -scope -hardy -heather -tenor -gut -herald -codes -tore -scales -wagon -##oo -luxury -tin -prefer -fountain -triangle -bonds -darling -convoy -dried -traced -beings -troy -accidentally -slam -findings -smelled -joey -lawyers -outcome -steep -bosnia -configuration -shifting -toll -brook -performers -lobby -philosophical -construct -shrine -aggregate -boot -cox -phenomenon -savage -insane -solely -reynolds -lifestyle -##ima -nationally -holdings -consideration -enable -edgar -mo -mama -##tein -fights -relegation -chances -atomic -hub -conjunction -awkward -reactions -currency -finale -kumar -underwent -steering -elaborate -gifts -comprising -melissa -veins -reasonable -sunshine -chi -solve -trails -inhabited -elimination -ethics -huh -ana -molly -consent -apartments -layout -marines -##ces -hunters -bulk -##oma -hometown -##wall -##mont -cracked -reads -neighbouring -withdrawn -admission -wingspan -damned -anthology -lancashire -brands -batting -forgive -cuban -awful -##lyn -104 -dimensions -imagination -##ade -dante -##ship -tracking -desperately -goalkeeper -##yne -groaned -workshops -confident -burton -gerald -milton -circus -uncertain -slope -copenhagen -sophia -fog -philosopher -portraits -accent -cycling -varying -gripped -larvae -garrett -specified -scotia -mature -luther -kurt -rap -##kes -aerial -750 -ferdinand -heated -es -transported -##shan -safely -nonetheless -##orn -##gal -motors -demanding -##sburg -startled -##brook -ally -generate -caps -ghana -stained -demo -mentions -beds -ap -afterward -diary -##bling -utility -##iro -richards -1837 -conspiracy -conscious -shining -footsteps -observer -cyprus -urged -loyalty -developer -probability -olive -upgraded -gym -miracle -insects -graves -1844 -ourselves -hydrogen -amazon -katie -tickets -poets -##pm -planes -##pan -prevention -witnessed -dense -jin -randy -tang -warehouse -monroe -bang -archived -elderly -investigations -alec -granite -mineral -conflicts -controlling -aboriginal -carlo -##zu -mechanics -stan -stark -rhode -skirt -est -##berry -bombs -respected -##horn -imposed -limestone -deny -nominee -memphis -grabbing -disabled -##als -amusement -aa -frankfurt -corn -referendum -varies -slowed -disk -firms -unconscious -incredible -clue -sue -##zhou -twist -##cio -joins -idaho -chad -developers -computing -destroyer -103 -mortal -tucker -kingston -choices -yu -carson -1800 -os -whitney -geneva -pretend -dimension -staged -plateau -maya -##une -freestyle -##bc -rovers -hiv -##ids -tristan -classroom -prospect -##hus -honestly -diploma -lied -thermal -auxiliary -feast -unlikely -iata -##tel -morocco -pounding -treasury -lithuania -considerably -1841 -dish -1812 -geological -matching -stumbled -destroying -marched -brien -advances -cake -nicole -belle -settling -measuring -directing -##mie -tuesday -bassist -capabilities -stunned -fraud -torpedo -##list -##phone -anton -wisdom -surveillance -ruined -##ulate -lawsuit -healthcare -theorem -halls -trend -aka -horizontal -dozens -acquire -lasting -swim -hawk -gorgeous -fees -vicinity -decrease -adoption -tactics -##ography -pakistani -##ole -draws -##hall -willie -burke -heath -algorithm -integral -powder -elliott -brigadier -jackie -tate -varieties -darker -##cho -lately -cigarette -specimens -adds -##ree -##ensis -##inger -exploded -finalist -cia -murders -wilderness -arguments -nicknamed -acceptance -onwards -manufacture -robertson -jets -tampa -enterprises -blog -loudly -composers -nominations -1838 -ai -malta -inquiry -automobile -hosting -viii -rays -tilted -grief -museums -strategies -furious -euro -equality -cohen -poison -surrey -wireless -governed -ridiculous -moses -##esh -##room -vanished -##ito -barnes -attract -morrison -istanbul -##iness -absent -rotation -petition -janet -##logical -satisfaction -custody -deliberately -observatory -comedian -surfaces -pinyin -novelist -strictly -canterbury -oslo -monks -embrace -ibm -jealous -photograph -continent -dorothy -marina -doc -excess -holden -allegations -explaining -stack -avoiding -lance -storyline -majesty -poorly -spike -dos -bradford -raven -travis -classics -proven -voltage -pillow -fists -butt -1842 -interpreted -##car -1839 -gage -telegraph -lens -promising -expelled -casual -collector -zones -##min -silly -nintendo -##kh -##bra -downstairs -chef -suspicious -afl -flies -vacant -uganda -pregnancy -condemned -lutheran -estimates -cheap -decree -saxon -proximity -stripped -idiot -deposits -contrary -presenter -magnus -glacier -im -offense -edwin -##ori -upright -##long -bolt -##ois -toss -geographical -##izes -environments -delicate -marking -abstract -xavier -nails -windsor -plantation -occurring -equity -saskatchewan -fears -drifted -sequences -vegetation -revolt -##stic -1843 -sooner -fusion -opposing -nato -skating -1836 -secretly -ruin -lease -##oc -edit -##nne -flora -anxiety -ruby -##ological -##mia -tel -bout -taxi -emmy -frost -rainbow -compounds -foundations -rainfall -assassination -nightmare -dominican -##win -achievements -deserve -orlando -intact -armenia -##nte -calgary -valentine -106 -marion -proclaimed -theodore -bells -courtyard -thigh -gonzalez -console -troop -minimal -monte -everyday -##ence -##if -supporter -terrorism -buck -openly -presbyterian -activists -carpet -##iers -rubbing -uprising -##yi -cute -conceived -legally -##cht -millennium -cello -velocity -ji -rescued -cardiff -1835 -rex -concentrate -senators -beard -rendered -glowing -battalions -scouts -competitors -sculptor -catalogue -arctic -ion -raja -bicycle -wow -glancing -lawn -##woman -gentleman -lighthouse -publish -predicted -calculated -##val -variants -##gne -strain -##ui -winston -deceased -##nus -touchdowns -brady -caleb -sinking -echoed -crush -hon -blessed -protagonist -hayes -endangered -magnitude -editors -##tine -estimate -responsibilities -##mel -backup -laying -consumed -sealed -zurich -lovers -frustrated -##eau -ahmed -kicking -mit -treasurer -1832 -biblical -refuse -terrified -pump -agrees -genuine -imprisonment -refuses -plymouth -##hen -lou -##nen -tara -trembling -antarctic -ton -learns -##tas -crap -crucial -faction -atop -##borough -wrap -lancaster -odds -hopkins -erik -lyon -##eon -bros -##ode -snap -locality -tips -empress -crowned -cal -acclaimed -chuckled -##ory -clara -sends -mild -towel -##fl -##day -##а -wishing -assuming -interviewed -##bal -##die -interactions -eden -cups -helena -##lf -indie -beck -##fire -batteries -filipino -wizard -parted -##lam -traces -##born -rows -idol -albany -delegates -##ees -##sar -discussions -##ex -notre -instructed -belgrade -highways -suggestion -lauren -possess -orientation -alexandria -abdul -beats -salary -reunion -ludwig -alright -wagner -intimate -pockets -slovenia -hugged -brighton -merchants -cruel -stole -trek -slopes -repairs -enrollment -politically -underlying -promotional -counting -boeing -##bb -isabella -naming -##и -keen -bacteria -listing -separately -belfast -ussr -450 -lithuanian -anybody -ribs -sphere -martinez -cock -embarrassed -proposals -fragments -nationals -##fs -##wski -premises -fin -1500 -alpine -matched -freely -bounded -jace -sleeve -##af -gaming -pier -populated -evident -##like -frances -flooded -##dle -frightened -pour -trainer -framed -visitor -challenging -pig -wickets -##fold -infected -email -##pes -arose -##aw -reward -ecuador -oblast -vale -ch -shuttle -##usa -bach -rankings -forbidden -cornwall -accordance -salem -consumers -bruno -fantastic -toes -machinery -resolved -julius -remembering -propaganda -iceland -bombardment -tide -contacts -wives -##rah -concerto -macdonald -albania -implement -daisy -tapped -sudan -helmet -angela -mistress -##lic -crop -sunk -finest -##craft -hostile -##ute -##tsu -boxer -fr -paths -adjusted -habit -ballot -supervision -soprano -##zen -bullets -wicked -sunset -regiments -disappear -lamp -performs -app -##gia -##oa -rabbit -digging -incidents -entries -##cion -dishes -##oi -introducing -##ati -##fied -freshman -slot -jill -tackles -baroque -backs -##iest -lone -sponsor -destiny -altogether -convert -##aro -consensus -shapes -demonstration -basically -feminist -auction -artifacts -##bing -strongest -twitter -halifax -2019 -allmusic -mighty -smallest -precise -alexandra -viola -##los -##ille -manuscripts -##illo -dancers -ari -managers -monuments -blades -barracks -springfield -maiden -consolidated -electron -##end -berry -airing -wheat -nobel -inclusion -blair -payments -geography -bee -cc -eleanor -react -##hurst -afc -manitoba -##yu -su -lineup -fitness -recreational -investments -airborne -disappointment -##dis -edmonton -viewing -##row -renovation -##cast -infant -bankruptcy -roses -aftermath -pavilion -##yer -carpenter -withdrawal -ladder -##hy -discussing -popped -reliable -agreements -rochester -##abad -curves -bombers -220 -rao -reverend -decreased -choosing -107 -stiff -consulting -naples -crawford -tracy -ka -ribbon -cops -##lee -crushed -deciding -unified -teenager -accepting -flagship -explorer -poles -sanchez -inspection -revived -skilled -induced -exchanged -flee -locals -tragedy -swallow -loading -hanna -demonstrate -##ela -salvador -flown -contestants -civilization -##ines -wanna -rhodes -fletcher -hector -knocking -considers -##ough -nash -mechanisms -sensed -mentally -walt -unclear -##eus -renovated -madame -##cks -crews -governmental -##hin -undertaken -monkey -##ben -##ato -fatal -armored -copa -caves -governance -grasp -perception -certification -froze -damp -tugged -wyoming -##rg -##ero -newman -##lor -nerves -curiosity -graph -115 -##ami -withdraw -tunnels -dull -meredith -moss -exhibits -neighbors -communicate -accuracy -explored -raiders -republicans -secular -kat -superman -penny -criticised -##tch -freed -update -conviction -wade -ham -likewise -delegation -gotta -doll -promises -technological -myth -nationality -resolve -convent -##mark -sharon -dig -sip -coordinator -entrepreneur -fold -##dine -capability -councillor -synonym -blown -swan -cursed -1815 -jonas -haired -sofa -canvas -keeper -rivalry -##hart -rapper -speedway -swords -postal -maxwell -estonia -potter -recurring -##nn -##ave -errors -##oni -cognitive -1834 -##² -claws -nadu -roberto -bce -wrestler -ellie -##ations -infinite -ink -##tia -presumably -finite -staircase -108 -noel -patricia -nacional -##cation -chill -eternal -tu -preventing -prussia -fossil -limbs -##logist -ernst -frog -perez -rene -##ace -pizza -prussian -##ios -##vy -molecules -regulatory -answering -opinions -sworn -lengths -supposedly -hypothesis -upward -habitats -seating -ancestors -drank -yield -hd -synthesis -researcher -modest -##var -mothers -peered -voluntary -homeland -##the -acclaim -##igan -static -valve -luxembourg -alto -carroll -fe -receptor -norton -ambulance -##tian -johnston -catholics -depicting -jointly -elephant -gloria -mentor -badge -ahmad -distinguish -remarked -councils -precisely -allison -advancing -detection -crowded -##10 -cooperative -ankle -mercedes -dagger -surrendered -pollution -commit -subway -jeffrey -lesson -sculptures -provider -##fication -membrane -timothy -rectangular -fiscal -heating -teammate -basket -particle -anonymous -deployment -##ple -missiles -courthouse -proportion -shoe -sec -##ller -complaints -forbes -blacks -abandon -remind -sizes -overwhelming -autobiography -natalie -##awa -risks -contestant -countryside -babies -scorer -invaded -enclosed -proceed -hurling -disorders -##cu -reflecting -continuously -cruiser -graduates -freeway -investigated -ore -deserved -maid -blocking -phillip -jorge -shakes -dove -mann -variables -lacked -burden -accompanying -que -consistently -organizing -provisional -complained -endless -##rm -tubes -juice -georges -krishna -mick -labels -thriller -##uch -laps -arcade -sage -snail -##table -shannon -fi -laurence -seoul -vacation -presenting -hire -churchill -surprisingly -prohibited -savannah -technically -##oli -170 -##lessly -testimony -suited -speeds -toys -romans -mlb -flowering -measurement -talented -kay -settings -charleston -expectations -shattered -achieving -triumph -ceremonies -portsmouth -lanes -mandatory -loser -stretching -cologne -realizes -seventy -cornell -careers -webb -##ulating -americas -budapest -ava -suspicion -##ison -yo -conrad -##hai -sterling -jessie -rector -##az -1831 -transform -organize -loans -christine -volcanic -warrant -slender -summers -subfamily -newer -danced -dynamics -rhine -proceeds -heinrich -gastropod -commands -sings -facilitate -easter -ra -positioned -responses -expense -fruits -yanked -imported -25th -velvet -vic -primitive -tribune -baldwin -neighbourhood -donna -rip -hay -pr -##uro -1814 -espn -welcomed -##aria -qualifier -glare -highland -timing -##cted -shells -eased -geometry -louder -exciting -slovakia -##sion -##iz -##lot -savings -prairie -##ques -marching -rafael -tonnes -##lled -curtain -preceding -shy -heal -greene -worthy -##pot -detachment -bury -sherman -##eck -reinforced -seeks -bottles -contracted -duchess -outfit -walsh -##sc -mickey -##ase -geoffrey -archer -squeeze -dawson -eliminate -invention -##enberg -neal -##eth -stance -dealer -coral -maple -retire -polo -simplified -##ht -1833 -hid -watts -backwards -jules -##oke -genesis -mt -frames -rebounds -burma -woodland -moist -santos -whispers -drained -subspecies -##aa -streaming -ulster -burnt -correspondence -maternal -gerard -denis -stealing -##load -genius -duchy -##oria -inaugurated -momentum -suits -placement -sovereign -clause -thames -##hara -confederation -reservation -sketch -yankees -lets -rotten -charm -hal -verses -ultra -commercially -dot -salon -citation -adopt -winnipeg -mist -allocated -cairo -##boy -jenkins -interference -objectives -##wind -1820 -portfolio -armoured -sectors -##eh -initiatives -##world -integrity -exercises -robe -tap -ab -gazed -##tones -distracted -rulers -111 -favorable -jerome -tended -cart -factories -##eri -diplomat -valued -gravel -charitable -##try -calvin -exploring -chang -shepherd -terrace -pdf -pupil -##ural -reflects -ups -##rch -governors -shelf -depths -##nberg -trailed -crest -tackle -##nian -##ats -hatred -##kai -clare -makers -ethiopia -longtime -detected -embedded -lacking -slapped -rely -thomson -anticipation -iso -morton -successive -agnes -screenwriter -straightened -philippe -playwright -haunted -licence -iris -intentions -sutton -112 -logical -correctly -##weight -branded -licked -tipped -silva -ricky -narrator -requests -##ents -greeted -supernatural -cow -##wald -lung -refusing -employer -strait -gaelic -liner -##piece -zoe -sabha -##mba -driveway -harvest -prints -bates -reluctantly -threshold -algebra -ira -wherever -coupled -240 -assumption -picks -##air -designers -raids -gentlemen -##ean -roller -blowing -leipzig -locks -screw -dressing -strand -##lings -scar -dwarf -depicts -##nu -nods -##mine -differ -boris -##eur -yuan -flip -##gie -mob -invested -questioning -applying -##ture -shout -##sel -gameplay -blamed -illustrations -bothered -weakness -rehabilitation -##of -##zes -envelope -rumors -miners -leicester -subtle -kerry -##ico -ferguson -##fu -premiership -ne -##cat -bengali -prof -catches -remnants -dana -##rily -shouting -presidents -baltic -ought -ghosts -dances -sailors -shirley -fancy -dominic -##bie -madonna -##rick -bark -buttons -gymnasium -ashes -liver -toby -oath -providence -doyle -evangelical -nixon -cement -carnegie -embarked -hatch -surroundings -guarantee -needing -pirate -essence -##bee -filter -crane -hammond -projected -immune -percy -twelfth -##ult -regent -doctoral -damon -mikhail -##ichi -lu -critically -elect -realised -abortion -acute -screening -mythology -steadily -##fc -frown -nottingham -kirk -wa -minneapolis -##rra -module -algeria -mc -nautical -encounters -surprising -statues -availability -shirts -pie -alma -brows -munster -mack -soup -crater -tornado -sanskrit -cedar -explosive -bordered -dixon -planets -stamp -exam -happily -##bble -carriers -kidnapped -##vis -accommodation -emigrated -##met -knockout -correspondent -violation -profits -peaks -lang -specimen -agenda -ancestry -pottery -spelling -equations -obtaining -ki -linking -1825 -debris -asylum -##20 -buddhism -teddy -##ants -gazette -##nger -##sse -dental -eligibility -utc -fathers -averaged -zimbabwe -francesco -coloured -hissed -translator -lynch -mandate -humanities -mackenzie -uniforms -lin -##iana -##gio -asset -mhz -fitting -samantha -genera -wei -rim -beloved -shark -riot -entities -expressions -indo -carmen -slipping -owing -abbot -neighbor -sidney -##av -rats -recommendations -encouraging -squadrons -anticipated -commanders -conquered -##oto -donations -diagnosed -##mond -divide -##iva -guessed -decoration -vernon -auditorium -revelation -conversations -##kers -##power -herzegovina -dash -alike -protested -lateral -herman -accredited -mg -##gent -freeman -mel -fiji -crow -crimson -##rine -livestock -##pped -humanitarian -bored -oz -whip -##lene -##ali -legitimate -alter -grinning -spelled -anxious -oriental -wesley -##nin -##hole -carnival -controller -detect -##ssa -bowed -educator -kosovo -macedonia -##sin -occupy -mastering -stephanie -janeiro -para -unaware -nurses -noon -135 -cam -hopefully -ranger -combine -sociology -polar -rica -##eer -neill -##sman -holocaust -##ip -doubled -lust -1828 -109 -decent -cooling -unveiled -##card -1829 -nsw -homer -chapman -meyer -##gin -dive -mae -reagan -expertise -##gled -darwin -brooke -sided -prosecution -investigating -comprised -petroleum -genres -reluctant -differently -trilogy -johns -vegetables -corpse -highlighted -lounge -pension -unsuccessfully -elegant -aided -ivory -beatles -amelia -cain -dubai -sunny -immigrant -babe -click -##nder -underwater -pepper -combining -mumbled -atlas -horns -accessed -ballad -physicians -homeless -gestured -rpm -freak -louisville -corporations -patriots -prizes -rational -warn -modes -decorative -overnight -din -troubled -phantom -##ort -monarch -sheer -##dorf -generals -guidelines -organs -addresses -##zon -enhance -curling -parishes -cord -##kie -linux -caesar -deutsche -bavaria -##bia -coleman -cyclone -##eria -bacon -petty -##yama -##old -hampton -diagnosis -1824 -throws -complexity -rita -disputed -##₃ -pablo -##sch -marketed -trafficking -##ulus -examine -plague -formats -##oh -vault -faithful -##bourne -webster -##ox -highlights -##ient -##ann -phones -vacuum -sandwich -modeling -##gated -bolivia -clergy -qualities -isabel -##nas -##ars -wears -screams -reunited -annoyed -bra -##ancy -##rate -differential -transmitter -tattoo -container -poker -##och -excessive -resides -cowboys -##tum -augustus -trash -providers -statute -retreated -balcony -reversed -void -storey -preceded -masses -leap -laughs -neighborhoods -wards -schemes -falcon -santo -battlefield -pad -ronnie -thread -lesbian -venus -##dian -beg -sandstone -daylight -punched -gwen -analog -stroked -wwe -acceptable -measurements -dec -toxic -##kel -adequate -surgical -economist -parameters -varsity -##sberg -quantity -ella -##chy -##rton -countess -generating -precision -diamonds -expressway -ga -##ı -1821 -uruguay -talents -galleries -expenses -scanned -colleague -outlets -ryder -lucien -##ila -paramount -##bon -syracuse -dim -fangs -gown -sweep -##sie -toyota -missionaries -websites -##nsis -sentences -adviser -val -trademark -spells -##plane -patience -starter -slim -##borg -toe -incredibly -shoots -elliot -nobility -##wyn -cowboy -endorsed -gardner -tendency -persuaded -organisms -emissions -kazakhstan -amused -boring -chips -themed -##hand -llc -constantinople -chasing -systematic -guatemala -borrowed -erin -carey -##hard -highlands -struggles -1810 -##ifying -##ced -wong -exceptions -develops -enlarged -kindergarten -castro -##ern -##rina -leigh -zombie -juvenile -##most -consul -##nar -sailor -hyde -clarence -intensive -pinned -nasty -useless -jung -clayton -stuffed -exceptional -ix -apostolic -230 -transactions -##dge -exempt -swinging -cove -religions -##ash -shields -dairy -bypass -190 -pursuing -bug -joyce -bombay -chassis -southampton -chat -interact -redesignated -##pen -nascar -pray -salmon -rigid -regained -malaysian -grim -publicity -constituted -capturing -toilet -delegate -purely -tray -drift -loosely -striker -weakened -trinidad -mitch -itv -defines -transmitted -ming -scarlet -nodding -fitzgerald -fu -narrowly -sp -tooth -standings -virtue -##₁ -##wara -##cting -chateau -gloves -lid -##nel -hurting -conservatory -##pel -sinclair -reopened -sympathy -nigerian -strode -advocated -optional -chronic -discharge -##rc -suck -compatible -laurel -stella -shi -fails -wage -dodge -128 -informal -sorts -levi -buddha -villagers -##aka -chronicles -heavier -summoned -gateway -3000 -eleventh -jewelry -translations -accordingly -seas -##ency -fiber -pyramid -cubic -dragging -##ista -caring -##ops -android -contacted -lunar -##dt -kai -lisbon -patted -1826 -sacramento -theft -madagascar -subtropical -disputes -ta -holidays -piper -willow -mare -cane -itunes -newfoundland -benny -companions -dong -raj -observe -roar -charming -plaque -tibetan -fossils -enacted -manning -bubble -tina -tanzania -##eda -##hir -funk -swamp -deputies -cloak -ufc -scenario -par -scratch -metals -anthem -guru -engaging -specially -##boat -dialects -nineteen -cecil -duet -disability -messenger -unofficial -##lies -defunct -eds -moonlight -drainage -surname -puzzle -honda -switching -conservatives -mammals -knox -broadcaster -sidewalk -cope -##ried -benson -princes -peterson -##sal -bedford -sharks -eli -wreck -alberto -gasp -archaeology -lgbt -teaches -securities -madness -compromise -waving -coordination -davidson -visions -leased -possibilities -eighty -jun -fernandez -enthusiasm -assassin -sponsorship -reviewer -kingdoms -estonian -laboratories -##fy -##nal -applies -verb -celebrations -##zzo -rowing -lightweight -sadness -submit -mvp -balanced -dude -##vas -explicitly -metric -magnificent -mound -brett -mohammad -mistakes -irregular -##hing -##ass -sanders -betrayed -shipped -surge -##enburg -reporters -termed -georg -pity -verbal -bulls -abbreviated -enabling -appealed -##are -##atic -sicily -sting -heel -sweetheart -bart -spacecraft -brutal -monarchy -##tter -aberdeen -cameo -diane -##ub -survivor -clyde -##aries -complaint -##makers -clarinet -delicious -chilean -karnataka -coordinates -1818 -panties -##rst -pretending -ar -dramatically -kiev -bella -tends -distances -113 -catalog -launching -instances -telecommunications -portable -lindsay -vatican -##eim -angles -aliens -marker -stint -screens -bolton -##rne -judy -wool -benedict -plasma -europa -spark -imaging -filmmaker -swiftly -##een -contributor -##nor -opted -stamps -apologize -financing -butter -gideon -sophisticated -alignment -avery -chemicals -yearly -speculation -prominence -professionally -##ils -immortal -institutional -inception -wrists -identifying -tribunal -derives -gains -##wo -papal -preference -linguistic -vince -operative -brewery -##ont -unemployment -boyd -##ured -##outs -albeit -prophet -1813 -bi -##rr -##face -##rad -quarterly -asteroid -cleaned -radius -temper -##llen -telugu -jerk -viscount -menu -##ote -glimpse -##aya -yacht -hawaiian -baden -##rl -laptop -readily -##gu -monetary -offshore -scots -watches -##yang -##arian -upgrade -needle -xbox -lea -encyclopedia -flank -fingertips -##pus -delight -teachings -confirm -roth -beaches -midway -winters -##iah -teasing -daytime -beverly -gambling -bonnie -##backs -regulated -clement -hermann -tricks -knot -##shing -##uring -##vre -detached -ecological -owed -specialty -byron -inventor -bats -stays -screened -unesco -midland -trim -affection -##ander -##rry -jess -thoroughly -feedback -##uma -chennai -strained -heartbeat -wrapping -overtime -pleaded -##sworth -mon -leisure -oclc -##tate -##ele -feathers -angelo -thirds -nuts -surveys -clever -gill -commentator -##dos -darren -rides -gibraltar -##nc -##mu -dissolution -dedication -shin -meals -saddle -elvis -reds -chaired -taller -appreciation -functioning -niece -favored -advocacy -robbie -criminals -suffolk -yugoslav -passport -constable -congressman -hastings -vera -##rov -consecrated -sparks -ecclesiastical -confined -##ovich -muller -floyd -nora -1822 -paved -1827 -cumberland -ned -saga -spiral -##flow -appreciated -yi -collaborative -treating -similarities -feminine -finishes -##ib -jade -import -##nse -##hot -champagne -mice -securing -celebrities -helsinki -attributes -##gos -cousins -phases -ache -lucia -gandhi -submission -vicar -spear -shine -tasmania -biting -detention -constitute -tighter -seasonal -##gus -terrestrial -matthews -##oka -effectiveness -parody -philharmonic -##onic -1816 -strangers -encoded -consortium -guaranteed -regards -shifts -tortured -collision -supervisor -inform -broader -insight -theaters -armour -emeritus -blink -incorporates -mapping -##50 -##ein -handball -flexible -##nta -substantially -generous -thief -##own -carr -loses -1793 -prose -ucla -romeo -generic -metallic -realization -damages -mk -commissioners -zach -default -##ther -helicopters -lengthy -stems -spa -partnered -spectators -rogue -indication -penalties -teresa -1801 -sen -##tric -dalton -##wich -irving -photographic -##vey -dell -deaf -peters -excluded -unsure -##vable -patterson -crawled -##zio -resided -whipped -latvia -slower -ecole -pipes -employers -maharashtra -comparable -va -textile -pageant -##gel -alphabet -binary -irrigation -chartered -choked -antoine -offs -waking -supplement -##wen -quantities -demolition -regain -locate -urdu -folks -alt -114 -##mc -scary -andreas -whites -##ava -classrooms -mw -aesthetic -publishes -valleys -guides -cubs -johannes -bryant -conventions -affecting -##itt -drain -awesome -isolation -prosecutor -ambitious -apology -captive -downs -atmospheric -lorenzo -aisle -beef -foul -##onia -kidding -composite -disturbed -illusion -natives -##ffer -emi -rockets -riverside -wartime -painters -adolf -melted -##ail -uncertainty -simulation -hawks -progressed -meantime -builder -spray -breach -unhappy -regina -russians -##urg -determining -##tation -tram -1806 -##quin -aging -##12 -1823 -garion -rented -mister -diaz -terminated -clip -1817 -depend -nervously -disco -owe -defenders -shiva -notorious -disbelief -shiny -worcester -##gation -##yr -trailing -undertook -islander -belarus -limitations -watershed -fuller -overlooking -utilized -raphael -1819 -synthetic -breakdown -klein -##nate -moaned -memoir -lamb -practicing -##erly -cellular -arrows -exotic -##graphy -witches -117 -charted -rey -hut -hierarchy -subdivision -freshwater -giuseppe -aloud -reyes -qatar -marty -sideways -utterly -sexually -jude -prayers -mccarthy -softball -blend -damien -##gging -##metric -wholly -erupted -lebanese -negro -revenues -tasted -comparative -teamed -transaction -labeled -maori -sovereignty -parkway -trauma -gran -malay -121 -advancement -descendant -2020 -buzz -salvation -inventory -symbolic -##making -antarctica -mps -##gas -##bro -mohammed -myanmar -holt -submarines -tones -##lman -locker -patriarch -bangkok -emerson -remarks -predators -kin -afghan -confession -norwich -rental -emerge -advantages -##zel -rca -##hold -shortened -storms -aidan -##matic -autonomy -compliance -##quet -dudley -atp -##osis -1803 -motto -documentation -summary -professors -spectacular -christina -archdiocese -flashing -innocence -remake -##dell -psychic -reef -scare -employ -rs -sticks -meg -gus -leans -##ude -accompany -bergen -tomas -##iko -doom -wages -pools -##nch -##bes -breasts -scholarly -alison -outline -brittany -breakthrough -willis -realistic -##cut -##boro -competitor -##stan -pike -picnic -icon -designing -commercials -washing -villain -skiing -micro -costumes -auburn -halted -executives -##hat -logistics -cycles -vowel -applicable -barrett -exclaimed -eurovision -eternity -ramon -##umi -##lls -modifications -sweeping -disgust -##uck -torch -aviv -ensuring -rude -dusty -sonic -donovan -outskirts -cu -pathway -##band -##gun -##lines -disciplines -acids -cadet -paired -##40 -sketches -##sive -marriages -##⁺ -folding -peers -slovak -implies -admired -##beck -1880s -leopold -instinct -attained -weston -megan -horace -##ination -dorsal -ingredients -evolutionary -##its -complications -deity -lethal -brushing -levy -deserted -institutes -posthumously -delivering -telescope -coronation -motivated -rapids -luc -flicked -pays -volcano -tanner -weighed -##nica -crowds -frankie -gifted -addressing -granddaughter -winding -##rna -constantine -gomez -##front -landscapes -rudolf -anthropology -slate -werewolf -##lio -astronomy -circa -rouge -dreaming -sack -knelt -drowned -naomi -prolific -tracked -freezing -herb -##dium -agony -randall -twisting -wendy -deposit -touches -vein -wheeler -##bbled -##bor -batted -retaining -tire -presently -compare -specification -daemon -nigel -##grave -merry -recommendation -czechoslovakia -sandra -ng -roma -##sts -lambert -inheritance -sheikh -winchester -cries -examining -##yle -comeback -cuisine -nave -##iv -ko -retrieve -tomatoes -barker -polished -defining -irene -lantern -personalities -begging -tract -swore -1809 -175 -##gic -omaha -brotherhood -##rley -haiti -##ots -exeter -##ete -##zia -steele -dumb -pearson -210 -surveyed -elisabeth -trends -##ef -fritz -##rf -premium -bugs -fraction -calmly -viking -##birds -tug -inserted -unusually -##ield -confronted -distress -crashing -brent -turks -resign -##olo -cambodia -gabe -sauce -##kal -evelyn -116 -extant -clusters -quarry -teenagers -luna -##lers -##ister -affiliation -drill -##ashi -panthers -scenic -libya -anita -strengthen -inscriptions -##cated -lace -sued -judith -riots -##uted -mint -##eta -preparations -midst -dub -challenger -##vich -mock -cf -displaced -wicket -breaths -enables -schmidt -analyst -##lum -ag -highlight -automotive -axe -josef -newark -sufficiently -resembles -50th -##pal -flushed -mum -traits -##ante -commodore -incomplete -warming -titular -ceremonial -ethical -118 -celebrating -eighteenth -cao -lima -medalist -mobility -strips -snakes -##city -miniature -zagreb -barton -escapes -umbrella -automated -doubted -differs -cooled -georgetown -dresden -cooked -fade -wyatt -rna -jacobs -carlton -abundant -stereo -boost -madras -inning -##hia -spur -ip -malayalam -begged -osaka -groan -escaping -charging -dose -vista -##aj -bud -papa -communists -advocates -edged -tri -##cent -resemble -peaking -necklace -fried -montenegro -saxony -goose -glances -stuttgart -curator -recruit -grocery -sympathetic -##tting -##fort -127 -lotus -randolph -ancestor -##rand -succeeding -jupiter -1798 -macedonian -##heads -hiking -1808 -handing -fischer -##itive -garbage -node -##pies -prone -singular -papua -inclined -attractions -italia -pouring -motioned -grandma -garnered -jacksonville -corp -ego -ringing -aluminum -##hausen -ordering -##foot -drawer -traders -synagogue -##play -##kawa -resistant -wandering -fragile -fiona -teased -var -hardcore -soaked -jubilee -decisive -exposition -mercer -poster -valencia -hale -kuwait -1811 -##ises -##wr -##eed -tavern -gamma -122 -johan -##uer -airways -amino -gil -##ury -vocational -domains -torres -##sp -generator -folklore -outcomes -##keeper -canberra -shooter -fl -beams -confrontation -##lling -##gram -feb -aligned -forestry -pipeline -jax -motorway -conception -decay -##tos -coffin -##cott -stalin -1805 -escorted -minded -##nam -sitcom -purchasing -twilight -veronica -additions -passive -tensions -straw -123 -frequencies -1804 -refugee -cultivation -##iate -christie -clary -bulletin -crept -disposal -##rich -##zong -processor -crescent -##rol -bmw -emphasized -whale -nazis -aurora -##eng -dwelling -hauled -sponsors -toledo -mega -ideology -theatres -tessa -cerambycidae -saves -turtle -cone -suspects -kara -rusty -yelling -greeks -mozart -shades -cocked -participant -##tro -shire -spit -freeze -necessity -##cos -inmates -nielsen -councillors -loaned -uncommon -omar -peasants -botanical -offspring -daniels -formations -jokes -1794 -pioneers -sigma -licensing -##sus -wheelchair -polite -1807 -liquor -pratt -trustee -##uta -forewings -balloon -##zz -kilometre -camping -explicit -casually -shawn -foolish -teammates -nm -hassan -carrie -judged -satisfy -vanessa -knives -selective -cnn -flowed -##lice -eclipse -stressed -eliza -mathematician -cease -cultivated -##roy -commissions -browns -##ania -destroyers -sheridan -meadow -##rius -minerals -##cial -downstream -clash -gram -memoirs -ventures -baha -seymour -archie -midlands -edith -fare -flynn -invite -canceled -tiles -stabbed -boulder -incorporate -amended -camden -facial -mollusk -unreleased -descriptions -yoga -grabs -550 -raises -ramp -shiver -##rose -coined -pioneering -tunes -qing -warwick -tops -119 -melanie -giles -##rous -wandered -##inal -annexed -nov -30th -unnamed -##ished -organizational -airplane -normandy -stoke -whistle -blessing -violations -chased -holders -shotgun -##ctic -outlet -reactor -##vik -tires -tearing -shores -fortified -mascot -constituencies -nc -columnist -productive -tibet -##rta -lineage -hooked -oct -tapes -judging -cody -##gger -hansen -kashmir -triggered -##eva -solved -cliffs -##tree -resisted -anatomy -protesters -transparent -implied -##iga -injection -mattress -excluding -##mbo -defenses -helpless -devotion -##elli -growl -liberals -weber -phenomena -atoms -plug -##iff -mortality -apprentice -howe -convincing -aaa -swimmer -barber -leone -promptly -sodium -def -nowadays -arise -##oning -gloucester -corrected -dignity -norm -erie -##ders -elders -evacuated -sylvia -compression -##yar -hartford -pose -backpack -reasoning -accepts -24th -wipe -millimetres -marcel -##oda -dodgers -albion -1790 -overwhelmed -aerospace -oaks -1795 -showcase -acknowledge -recovering -nolan -ashe -hurts -geology -fashioned -disappearance -farewell -swollen -shrug -marquis -wimbledon -124 -rue -1792 -commemorate -reduces -experiencing -inevitable -calcutta -intel -##court -murderer -sticking -fisheries -imagery -bloom -280 -brake -##inus -gustav -hesitation -memorable -po -viral -beans -accidents -tunisia -antenna -spilled -consort -treatments -aye -perimeter -##gard -donation -hostage -migrated -banker -addiction -apex -lil -trout -##ously -conscience -##nova -rams -sands -genome -passionate -troubles -##lets -##set -amid -##ibility -##ret -higgins -exceed -vikings -##vie -payne -##zan -muscular -##ste -defendant -sucking -##wal -ibrahim -fuselage -claudia -vfl -europeans -snails -interval -##garh -preparatory -statewide -tasked -lacrosse -viktor -##lation -angola -##hra -flint -implications -employs -teens -patrons -stall -weekends -barriers -scrambled -nucleus -tehran -jenna -parsons -lifelong -robots -displacement -5000 -##bles -precipitation -##gt -knuckles -clutched -1802 -marrying -ecology -marx -accusations -declare -scars -kolkata -mat -meadows -bermuda -skeleton -finalists -vintage -crawl -coordinate -affects -subjected -orchestral -mistaken -##tc -mirrors -dipped -relied -260 -arches -candle -##nick -incorporating -wildly -fond -basilica -owl -fringe -rituals -whispering -stirred -feud -tertiary -slick -goat -honorable -whereby -skip -ricardo -stripes -parachute -adjoining -submerged -synthesizer -##gren -intend -positively -ninety -phi -beaver -partition -fellows -alexis -prohibition -carlisle -bizarre -fraternity -##bre -doubts -icy -cbc -aquatic -sneak -sonny -combines -airports -crude -supervised -spatial -merge -alfonso -##bic -corrupt -scan -undergo -##ams -disabilities -colombian -comparing -dolphins -perkins -##lish -reprinted -unanimous -bounced -hairs -underworld -midwest -semester -bucket -paperback -miniseries -coventry -demise -##leigh -demonstrations -sensor -rotating -yan -##hler -arrange -soils -##idge -hyderabad -labs -##dr -brakes -grandchildren -##nde -negotiated -rover -ferrari -continuation -directorate -augusta -stevenson -counterpart -gore -##rda -nursery -rican -ave -collectively -broadly -pastoral -repertoire -asserted -discovering -nordic -styled -fiba -cunningham -harley -middlesex -survives -tumor -tempo -zack -aiming -lok -urgent -##rade -##nto -devils -##ement -contractor -turin -##wl -##ool -bliss -repaired -simmons -moan -astronomical -cr -negotiate -lyric -1890s -lara -bred -clad -angus -pbs -##ience -engineered -posed -##lk -hernandez -possessions -elbows -psychiatric -strokes -confluence -electorate -lifts -campuses -lava -alps -##ep -##ution -##date -physicist -woody -##page -##ographic -##itis -juliet -reformation -sparhawk -320 -complement -suppressed -jewel -##½ -floated -##kas -continuity -sadly -##ische -inability -melting -scanning -paula -flour -judaism -safer -vague -##lm -solving -curb -##stown -financially -gable -bees -expired -miserable -cassidy -dominion -1789 -cupped -145 -robbery -facto -amos -warden -resume -tallest -marvin -ing -pounded -usd -declaring -gasoline -##aux -darkened -270 -650 -sophomore -##mere -erection -gossip -televised -risen -dial -##eu -pillars -##link -passages -profound -##tina -arabian -ashton -silicon -nail -##ead -##lated -##wer -##hardt -fleming -firearms -ducked -circuits -blows -waterloo -titans -##lina -atom -fireplace -cheshire -financed -activation -algorithms -##zzi -constituent -catcher -cherokee -partnerships -sexuality -platoon -tragic -vivian -guarded -whiskey -meditation -poetic -##late -##nga -##ake -porto -listeners -dominance -kendra -mona -chandler -factions -22nd -salisbury -attitudes -derivative -##ido -##haus -intake -paced -javier -illustrator -barrels -bias -cockpit -burnett -dreamed -ensuing -##anda -receptors -someday -hawkins -mattered -##lal -slavic -1799 -jesuit -cameroon -wasted -tai -wax -lowering -victorious -freaking -outright -hancock -librarian -sensing -bald -calcium -myers -tablet -announcing -barack -shipyard -pharmaceutical -##uan -greenwich -flush -medley -patches -wolfgang -pt -speeches -acquiring -exams -nikolai -##gg -hayden -kannada -##type -reilly -##pt -waitress -abdomen -devastated -capped -pseudonym -pharmacy -fulfill -paraguay -1796 -clicked -##trom -archipelago -syndicated -##hman -lumber -orgasm -rejection -clifford -lorraine -advent -mafia -rodney -brock -##ght -##used -##elia -cassette -chamberlain -despair -mongolia -sensors -developmental -upstream -##eg -##alis -spanning -165 -trombone -basque -seeded -interred -renewable -rhys -leapt -revision -molecule -##ages -chord -vicious -nord -shivered -23rd -arlington -debts -corpus -sunrise -bays -blackburn -centimetres -##uded -shuddered -gm -strangely -gripping -cartoons -isabelle -orbital -##ppa -seals -proving -##lton -refusal -strengthened -bust -assisting -baghdad -batsman -portrayal -mara -pushes -spears -og -##cock -reside -nathaniel -brennan -1776 -confirmation -caucus -##worthy -markings -yemen -nobles -ku -lazy -viewer -catalan -encompasses -sawyer -##fall -sparked -substances -patents -braves -arranger -evacuation -sergio -persuade -dover -tolerance -penguin -cum -jockey -insufficient -townships -occupying -declining -plural -processed -projection -puppet -flanders -introduces -liability -##yon -gymnastics -antwerp -taipei -hobart -candles -jeep -wes -observers -126 -chaplain -bundle -glorious -##hine -hazel -flung -sol -excavations -dumped -stares -sh -bangalore -triangular -icelandic -intervals -expressing -turbine -##vers -songwriting -crafts -##igo -jasmine -ditch -rite -##ways -entertaining -comply -sorrow -wrestlers -basel -emirates -marian -rivera -helpful -##some -caution -downward -networking -##atory -##tered -darted -genocide -emergence -replies -specializing -spokesman -convenient -unlocked -fading -augustine -concentrations -resemblance -elijah -investigator -andhra -##uda -promotes -bean -##rrell -fleeing -wan -simone -announcer -##ame -##bby -lydia -weaver -132 -residency -modification -##fest -stretches -##ast -alternatively -nat -lowe -lacks -##ented -pam -tile -concealed -inferior -abdullah -residences -tissues -vengeance -##ided -moisture -peculiar -groove -zip -bologna -jennings -ninja -oversaw -zombies -pumping -batch -livingston -emerald -installations -1797 -peel -nitrogen -rama -##fying -##star -schooling -strands -responding -werner -##ost -lime -casa -accurately -targeting -##rod -underway -##uru -hemisphere -lester -##yard -occupies -2d -griffith -angrily -reorganized -##owing -courtney -deposited -##dd -##30 -estadio -##ifies -dunn -exiled -##ying -checks -##combe -##о -##fly -successes -unexpectedly -blu -assessed -##flower -##ه -observing -sacked -spiders -kn -##tail -mu -nodes -prosperity -audrey -divisional -155 -broncos -tangled -adjust -feeds -erosion -paolo -surf -directory -snatched -humid -admiralty -screwed -gt -reddish -##nese -modules -trench -lamps -bind -leah -bucks -competes -##nz -##form -transcription -##uc -isles -violently -clutching -pga -cyclist -inflation -flats -ragged -unnecessary -##hian -stubborn -coordinated -harriet -baba -disqualified -330 -insect -wolfe -##fies -reinforcements -rocked -duel -winked -embraced -bricks -##raj -hiatus -defeats -pending -brightly -jealousy -##xton -##hm -##uki -lena -gdp -colorful -##dley -stein -kidney -##shu -underwear -wanderers -##haw -##icus -guardians -m³ -roared -habits -##wise -permits -gp -uranium -punished -disguise -bundesliga -elise -dundee -erotic -partisan -pi -collectors -float -individually -rendering -behavioral -bucharest -ser -hare -valerie -corporal -nutrition -proportional -##isa -immense -##kis -pavement -##zie -##eld -sutherland -crouched -1775 -##lp -suzuki -trades -endurance -operas -crosby -prayed -priory -rory -socially -##urn -gujarat -##pu -walton -cube -pasha -privilege -lennon -floods -thorne -waterfall -nipple -scouting -approve -##lov -minorities -voter -dwight -extensions -assure -ballroom -slap -dripping -privileges -rejoined -confessed -demonstrating -patriotic -yell -investor -##uth -pagan -slumped -squares -##cle -##kins -confront -bert -embarrassment -##aid -aston -urging -sweater -starr -yuri -brains -williamson -commuter -mortar -structured -selfish -exports -##jon -cds -##him -unfinished -##rre -mortgage -destinations -##nagar -canoe -solitary -buchanan -delays -magistrate -fk -##pling -motivation -##lier -##vier -recruiting -assess -##mouth -malik -antique -1791 -pius -rahman -reich -tub -zhou -smashed -airs -galway -xii -conditioning -honduras -discharged -dexter -##pf -lionel -129 -debates -lemon -tiffany -volunteered -dom -dioxide -procession -devi -sic -tremendous -advertisements -colts -transferring -verdict -hanover -decommissioned -utter -relate -pac -racism -##top -beacon -limp -similarity -terra -occurrence -ant -##how -becky -capt -updates -armament -richie -pal -##graph -halloween -mayo -##ssen -##bone -cara -serena -fcc -dolls -obligations -##dling -violated -lafayette -jakarta -exploitation -##ime -infamous -iconic -##lah -##park -kitty -moody -reginald -dread -spill -crystals -olivier -modeled -bluff -equilibrium -separating -notices -ordnance -extinction -onset -cosmic -attachment -sammy -expose -privy -anchored -##bil -abbott -admits -bending -baritone -emmanuel -policeman -vaughan -winged -climax -dresses -denny -polytechnic -mohamed -burmese -authentic -nikki -genetics -grandparents -homestead -gaza -postponed -metacritic -una -##sby -##bat -unstable -dissertation -##rial -##cian -curls -obscure -uncovered -bronx -praying -disappearing -##hoe -prehistoric -coke -turret -mutations -nonprofit -pits -monaco -##ي -##usion -prominently -dispatched -podium -##mir -uci -##uation -133 -fortifications -birthplace -kendall -##lby -##oll -preacher -rack -goodman -##rman -persistent -##ott -countless -jaime -recorder -lexington -persecution -jumps -renewal -wagons -##11 -crushing -##holder -decorations -##lake -abundance -wrath -laundry -£1 -garde -##rp -jeanne -beetles -peasant -##sl -splitting -caste -sergei -##rer -##ema -scripts -##ively -rub -satellites -##vor -inscribed -verlag -scrapped -gale -packages -chick -potato -slogan -kathleen -arabs -##culture -counterparts -reminiscent -choral -##tead -rand -retains -bushes -dane -accomplish -courtesy -closes -##oth -slaughter -hague -krakow -lawson -tailed -elias -ginger -##ttes -canopy -betrayal -rebuilding -turf -##hof -frowning -allegiance -brigades -kicks -rebuild -polls -alias -nationalism -td -rowan -audition -bowie -fortunately -recognizes -harp -dillon -horrified -##oro -renault -##tics -ropes -##α -presumed -rewarded -infrared -wiping -accelerated -illustration -##rid -presses -practitioners -badminton -##iard -detained -##tera -recognizing -relates -misery -##sies -##tly -reproduction -piercing -potatoes -thornton -esther -manners -hbo -##aan -ours -bullshit -ernie -perennial -sensitivity -illuminated -rupert -##jin -##iss -##ear -rfc -nassau -##dock -staggered -socialism -##haven -appointments -nonsense -prestige -sharma -haul -##tical -solidarity -gps -##ook -##rata -igor -pedestrian -##uit -baxter -tenants -wires -medication -unlimited -guiding -impacts -diabetes -##rama -sasha -pas -clive -extraction -131 -continually -constraints -##bilities -sonata -hunted -sixteenth -chu -planting -quote -mayer -pretended -abs -spat -##hua -ceramic -##cci -curtains -pigs -pitching -##dad -latvian -sore -dayton -##sted -##qi -patrols -slice -playground -##nted -shone -stool -apparatus -inadequate -mates -treason -##ija -desires -##liga -##croft -somalia -laurent -mir -leonardo -oracle -grape -obliged -chevrolet -thirteenth -stunning -enthusiastic -##ede -accounted -concludes -currents -basil -##kovic -drought -##rica -mai -##aire -shove -posting -##shed -pilgrimage -humorous -packing -fry -pencil -wines -smells -144 -marilyn -aching -newest -clung -bon -neighbours -sanctioned -##pie -mug -##stock -drowning -##mma -hydraulic -##vil -hiring -reminder -lilly -investigators -##ncies -sour -##eous -compulsory -packet -##rion -##graphic -##elle -cannes -##inate -depressed -##rit -heroic -importantly -theresa -##tled -conway -saturn -marginal -rae -##xia -corresponds -royce -pact -jasper -explosives -packaging -aluminium -##ttered -denotes -rhythmic -spans -assignments -hereditary -outlined -originating -sundays -lad -reissued -greeting -beatrice -##dic -pillar -marcos -plots -handbook -alcoholic -judiciary -avant -slides -extract -masculine -blur -##eum -##force -homage -trembled -owens -hymn -trey -omega -signaling -socks -accumulated -reacted -attic -theo -lining -angie -distraction -primera -talbot -##key -1200 -ti -creativity -billed -##hey -deacon -eduardo -identifies -proposition -dizzy -gunner -hogan -##yam -##pping -##hol -ja -##chan -jensen -reconstructed -##berger -clearance -darius -##nier -abe -harlem -plea -dei -circled -emotionally -notation -fascist -neville -exceeded -upwards -viable -ducks -##fo -workforce -racer -limiting -shri -##lson -possesses -1600 -kerr -moths -devastating -laden -disturbing -locking -##cture -gal -fearing -accreditation -flavor -aide -1870s -mountainous -##baum -melt -##ures -motel -texture -servers -soda -##mb -herd -##nium -erect -puzzled -hum -peggy -examinations -gould -testified -geoff -ren -devised -sacks -##law -denial -posters -grunted -cesar -tutor -ec -gerry -offerings -byrne -falcons -combinations -ct -incoming -pardon -rocking -26th -avengers -flared -mankind -seller -uttar -loch -nadia -stroking -exposing -##hd -fertile -ancestral -instituted -##has -noises -prophecy -taxation -eminent -vivid -pol -##bol -dart -indirect -multimedia -notebook -upside -displaying -adrenaline -referenced -geometric -##iving -progression -##ddy -blunt -announce -##far -implementing -##lav -aggression -liaison -cooler -cares -headache -plantations -gorge -dots -impulse -thickness -ashamed -averaging -kathy -obligation -precursor -137 -fowler -symmetry -thee -225 -hears -##rai -undergoing -ads -butcher -bowler -##lip -cigarettes -subscription -goodness -##ically -browne -##hos -##tech -kyoto -donor -##erty -damaging -friction -drifting -expeditions -hardened -prostitution -152 -fauna -blankets -claw -tossing -snarled -butterflies -recruits -investigative -coated -healed -138 -communal -hai -xiii -academics -boone -psychologist -restless -lahore -stephens -mba -brendan -foreigners -printer -##pc -ached -explode -27th -deed -scratched -dared -##pole -cardiac -1780 -okinawa -proto -commando -compelled -oddly -electrons -##base -replica -thanksgiving -##rist -sheila -deliberate -stafford -tidal -representations -hercules -ou -##path -##iated -kidnapping -lenses -##tling -deficit -samoa -mouths -consuming -computational -maze -granting -smirk -razor -fixture -ideals -inviting -aiden -nominal -##vs -issuing -julio -pitt -ramsey -docks -##oss -exhaust -##owed -bavarian -draped -anterior -mating -ethiopian -explores -noticing -##nton -discarded -convenience -hoffman -endowment -beasts -cartridge -mormon -paternal -probe -sleeves -interfere -lump -deadline -##rail -jenks -bulldogs -scrap -alternating -justified -reproductive -nam -seize -descending -secretariat -kirby -coupe -grouped -smash -panther -sedan -tapping -##18 -lola -cheer -germanic -unfortunate -##eter -unrelated -##fan -subordinate -##sdale -suzanne -advertisement -##ility -horsepower -##lda -cautiously -discourse -luigi -##mans -##fields -noun -prevalent -mao -schneider -everett -surround -governorate -kira -##avia -westward -##take -misty -rails -sustainability -134 -unused -##rating -packs -toast -unwilling -regulate -thy -suffrage -nile -awe -assam -definitions -travelers -affordable -##rb -conferred -sells -undefeated -beneficial -torso -basal -repeating -remixes -##pass -bahrain -cables -fang -##itated -excavated -numbering -statutory -##rey -deluxe -##lian -forested -ramirez -derbyshire -zeus -slamming -transfers -astronomer -banana -lottery -berg -histories -bamboo -##uchi -resurrection -posterior -bowls -vaguely -##thi -thou -preserving -tensed -offence -##inas -meyrick -callum -ridden -watt -langdon -tying -lowland -snorted -daring -truman -##hale -##girl -aura -overly -filing -weighing -goa -infections -philanthropist -saunders -eponymous -##owski -latitude -perspectives -reviewing -mets -commandant -radial -##kha -flashlight -reliability -koch -vowels -amazed -ada -elaine -supper -##rth -##encies -predator -debated -soviets -cola -##boards -##nah -compartment -crooked -arbitrary -fourteenth -##ctive -havana -majors -steelers -clips -profitable -ambush -exited -packers -##tile -nude -cracks -fungi -##е -limb -trousers -josie -shelby -tens -frederic -##ος -definite -smoothly -constellation -insult -baton -discs -lingering -##nco -conclusions -lent -staging -becker -grandpa -shaky -##tron -einstein -obstacles -sk -adverse -elle -economically -##moto -mccartney -thor -dismissal -motions -readings -nostrils -treatise -##pace -squeezing -evidently -prolonged -1783 -venezuelan -je -marguerite -beirut -takeover -shareholders -##vent -denise -digit -airplay -norse -##bbling -imaginary -pills -hubert -blaze -vacated -eliminating -##ello -vine -mansfield -##tty -retrospective -barrow -borne -clutch -bail -forensic -weaving -##nett -##witz -desktop -citadel -promotions -worrying -dorset -ieee -subdivided -##iating -manned -expeditionary -pickup -synod -chuckle -185 -barney -##rz -##ffin -functionality -karachi -litigation -meanings -uc -lick -turbo -anders -##ffed -execute -curl -oppose -ankles -typhoon -##د -##ache -##asia -linguistics -compassion -pressures -grazing -perfection -##iting -immunity -monopoly -muddy -backgrounds -136 -namibia -francesca -monitors -attracting -stunt -tuition -##ии -vegetable -##mates -##quent -mgm -jen -complexes -forts -##ond -cellar -bites -seventeenth -royals -flemish -failures -mast -charities -##cular -peruvian -capitals -macmillan -ipswich -outward -frigate -postgraduate -folds -employing -##ouse -concurrently -fiery -##tai -contingent -nightmares -monumental -nicaragua -##kowski -lizard -mal -fielding -gig -reject -##pad -harding -##ipe -coastline -##cin -##nos -beethoven -humphrey -innovations -##tam -##nge -norris -doris -solicitor -huang -obey -141 -##lc -niagara -##tton -shelves -aug -bourbon -curry -nightclub -specifications -hilton -##ndo -centennial -dispersed -worm -neglected -briggs -sm -font -kuala -uneasy -plc -##nstein -##bound -##aking -##burgh -awaiting -pronunciation -##bbed -##quest -eh -optimal -zhu -raped -greens -presided -brenda -worries -##life -venetian -marxist -turnout -##lius -refined -braced -sins -grasped -sunderland -nickel -speculated -lowell -cyrillic -communism -fundraising -resembling -colonists -mutant -freddie -usc -##mos -gratitude -##run -mural -##lous -chemist -wi -reminds -28th -steals -tess -pietro -##ingen -promoter -ri -microphone -honoured -rai -sant -##qui -feather -##nson -burlington -kurdish -terrorists -deborah -sickness -##wed -##eet -hazard -irritated -desperation -veil -clarity -##rik -jewels -xv -##gged -##ows -##cup -berkshire -unfair -mysteries -orchid -winced -exhaustion -renovations -stranded -obe -infinity -##nies -adapt -redevelopment -thanked -registry -olga -domingo -noir -tudor -ole -##atus -commenting -behaviors -##ais -crisp -pauline -probable -stirling -wigan -##bian -paralympics -panting -surpassed -##rew -luca -barred -pony -famed -##sters -cassandra -waiter -carolyn -exported -##orted -andres -destructive -deeds -jonah -castles -vacancy -suv -##glass -1788 -orchard -yep -famine -belarusian -sprang -##forth -skinny -##mis -administrators -rotterdam -zambia -zhao -boiler -discoveries -##ride -##physics -lucius -disappointing -outreach -spoon -##frame -qualifications -unanimously -enjoys -regency -##iidae -stade -realism -veterinary -rodgers -dump -alain -chestnut -castile -censorship -rumble -gibbs -##itor -communion -reggae -inactivated -logs -loads -##houses -homosexual -##iano -ale -informs -##cas -phrases -plaster -linebacker -ambrose -kaiser -fascinated -850 -limerick -recruitment -forge -mastered -##nding -leinster -rooted -threaten -##strom -borneo -##hes -suggestions -scholarships -propeller -documentaries -patronage -coats -constructing -invest -neurons -comet -entirety -shouts -identities -annoying -unchanged -wary -##antly -##ogy -neat -oversight -##kos -phillies -replay -constance -##kka -incarnation -humble -skies -minus -##acy -smithsonian -##chel -guerrilla -jar -cadets -##plate -surplus -audit -##aru -cracking -joanna -louisa -pacing -##lights -intentionally -##iri -diner -nwa -imprint -australians -tong -unprecedented -bunker -naive -specialists -ark -nichols -railing -leaked -pedal -##uka -shrub -longing -roofs -v8 -captains -neural -tuned -##ntal -##jet -emission -medina -frantic -codex -definitive -sid -abolition -intensified -stocks -enrique -sustain -genoa -oxide -##written -clues -cha -##gers -tributaries -fragment -venom -##rity -##ente -##sca -muffled -vain -sire -laos -##ingly -##hana -hastily -snapping -surfaced -sentiment -motive -##oft -contests -approximate -mesa -luckily -dinosaur -exchanges -propelled -accord -bourne -relieve -tow -masks -offended -##ues -cynthia -##mmer -rains -bartender -zinc -reviewers -lois -##sai -legged -arrogant -rafe -rosie -comprise -handicap -blockade -inlet -lagoon -copied -drilling -shelley -petals -##inian -mandarin -obsolete -##inated -onward -arguably -productivity -cindy -praising -seldom -busch -discusses -raleigh -shortage -ranged -stanton -encouragement -firstly -conceded -overs -temporal -##uke -cbe -##bos -woo -certainty -pumps -##pton -stalked -##uli -lizzie -periodic -thieves -weaker -##night -gases -shoving -chooses -wc -##chemical -prompting -weights -##kill -robust -flanked -sticky -hu -tuberculosis -##eb -##eal -christchurch -resembled -wallet -reese -inappropriate -pictured -distract -fixing -fiddle -giggled -burger -heirs -hairy -mechanic -torque -apache -obsessed -chiefly -cheng -logging -##tag -extracted -meaningful -numb -##vsky -gloucestershire -reminding -##bay -unite -##lit -breeds -diminished -clown -glove -1860s -##ن -##ug -archibald -focal -freelance -sliced -depiction -##yk -organism -switches -sights -stray -crawling -##ril -lever -leningrad -interpretations -loops -anytime -reel -alicia -delighted -##ech -inhaled -xiv -suitcase -bernie -vega -licenses -northampton -exclusion -induction -monasteries -racecourse -homosexuality -##right -##sfield -##rky -dimitri -michele -alternatives -ions -commentators -genuinely -objected -pork -hospitality -fencing -stephan -warships -peripheral -wit -drunken -wrinkled -quentin -spends -departing -chung -numerical -spokesperson -##zone -johannesburg -caliber -killers -##udge -assumes -neatly -demographic -abigail -bloc -##vel -mounting -##lain -bentley -slightest -xu -recipients -##jk -merlin -##writer -seniors -prisons -blinking -hindwings -flickered -kappa -##hel -80s -strengthening -appealing -brewing -gypsy -mali -lashes -hulk -unpleasant -harassment -bio -treaties -predict -instrumentation -pulp -troupe -boiling -mantle -##ffe -ins -##vn -dividing -handles -verbs -##onal -coconut -senegal -340 -thorough -gum -momentarily -##sto -cocaine -panicked -destined -##turing -teatro -denying -weary -captained -mans -##hawks -##code -wakefield -bollywood -thankfully -##16 -cyril -##wu -amendments -##bahn -consultation -stud -reflections -kindness -1787 -internally -##ovo -tex -mosaic -distribute -paddy -seeming -143 -##hic -piers -##15 -##mura -##verse -popularly -winger -kang -sentinel -mccoy -##anza -covenant -##bag -verge -fireworks -suppress -thrilled -dominate -##jar -swansea -##60 -142 -reconciliation -##ndi -stiffened -cue -dorian -##uf -damascus -amor -ida -foremost -##aga -porsche -unseen -dir -##had -##azi -stony -lexi -melodies -##nko -angular -integer -podcast -ants -inherent -jaws -justify -persona -##olved -josephine -##nr -##ressed -customary -flashes -gala -cyrus -glaring -backyard -ariel -physiology -greenland -html -stir -avon -atletico -finch -methodology -ked -##lent -mas -catholicism -townsend -branding -quincy -fits -containers -1777 -ashore -aragon -##19 -forearm -poisoning -##sd -adopting -conquer -grinding -amnesty -keller -finances -evaluate -forged -lankan -instincts -##uto -guam -bosnian -photographed -workplace -desirable -protector -##dog -allocation -intently -encourages -willy -##sten -bodyguard -electro -brighter -##ν -bihar -##chev -lasts -opener -amphibious -sal -verde -arte -##cope -captivity -vocabulary -yields -##tted -agreeing -desmond -pioneered -##chus -strap -campaigned -railroads -##ович -emblem -##dre -stormed -501 -##ulous -marijuana -northumberland -##gn -##nath -bowen -landmarks -beaumont -##qua -danube -##bler -attorneys -th -ge -flyers -critique -villains -cass -mutation -acc -##0s -colombo -mckay -motif -sampling -concluding -syndicate -##rell -neon -stables -ds -warnings -clint -mourning -wilkinson -##tated -merrill -leopard -evenings -exhaled -emil -sonia -ezra -discrete -stove -farrell -fifteenth -prescribed -superhero -##rier -worms -helm -wren -##duction -##hc -expo -##rator -hq -unfamiliar -antony -prevents -acceleration -fiercely -mari -painfully -calculations -cheaper -ign -clifton -irvine -davenport -mozambique -##np -pierced -##evich -wonders -##wig -##cate -##iling -crusade -ware -##uel -enzymes -reasonably -mls -##coe -mater -ambition -bunny -eliot -kernel -##fin -asphalt -headmaster -torah -aden -lush -pins -waived -##care -##yas -joao -substrate -enforce -##grad -##ules -alvarez -selections -epidemic -tempted -##bit -bremen -translates -ensured -waterfront -29th -forrest -manny -malone -kramer -reigning -cookies -simpler -absorption -205 -engraved -##ffy -evaluated -1778 -haze -146 -comforting -crossover -##abe -thorn -##rift -##imo -##pop -suppression -fatigue -cutter -##tr -201 -wurttemberg -##orf -enforced -hovering -proprietary -gb -samurai -syllable -ascent -lacey -tick -lars -tractor -merchandise -rep -bouncing -defendants -##yre -huntington -##ground -##oko -standardized -##hor -##hima -assassinated -nu -predecessors -rainy -liar -assurance -lyrical -##uga -secondly -flattened -ios -parameter -undercover -##mity -bordeaux -punish -ridges -markers -exodus -inactive -hesitate -debbie -nyc -pledge -savoy -nagar -offset -organist -##tium -hesse -marin -converting -##iver -diagram -propulsion -pu -validity -reverted -supportive -##dc -ministries -clans -responds -proclamation -##inae -##ø -##rea -ein -pleading -patriot -sf -birch -islanders -strauss -hates -##dh -brandenburg -concession -rd -##ob -1900s -killings -textbook -antiquity -cinematography -wharf -embarrassing -setup -creed -farmland -inequality -centred -signatures -fallon -370 -##ingham -##uts -ceylon -gazing -directive -laurie -##tern -globally -##uated -##dent -allah -excavation -threads -##cross -148 -frantically -icc -utilize -determines -respiratory -thoughtful -receptions -##dicate -merging -chandra -seine -147 -builders -builds -diagnostic -dev -visibility -goddamn -analyses -dhaka -cho -proves -chancel -concurrent -curiously -canadians -pumped -restoring -1850s -turtles -jaguar -sinister -spinal -traction -declan -vows -1784 -glowed -capitalism -swirling -install -universidad -##lder -##oat -soloist -##genic -##oor -coincidence -beginnings -nissan -dip -resorts -caucasus -combustion -infectious -##eno -pigeon -serpent -##itating -conclude -masked -salad -jew -##gr -surreal -toni -##wc -harmonica -151 -##gins -##etic -##coat -fishermen -intending -bravery -##wave -klaus -titan -wembley -taiwanese -ransom -40th -incorrect -hussein -eyelids -jp -cooke -dramas -utilities -##etta -##print -eisenhower -principally -granada -lana -##rak -openings -concord -##bl -bethany -connie -morality -sega -##mons -##nard -earnings -##kara -##cine -wii -communes -##rel -coma -composing -softened -severed -grapes -##17 -nguyen -analyzed -warlord -hubbard -heavenly -behave -slovenian -##hit -##ony -hailed -filmmakers -trance -caldwell -skye -unrest -coward -likelihood -##aging -bern -sci -taliban -honolulu -propose -##wang -1700 -browser -imagining -cobra -contributes -dukes -instinctively -conan -violinist -##ores -accessories -gradual -##amp -quotes -sioux -##dating -undertake -intercepted -sparkling -compressed -139 -fungus -tombs -haley -imposing -rests -degradation -lincolnshire -retailers -wetlands -tulsa -distributor -dungeon -nun -greenhouse -convey -atlantis -aft -exits -oman -dresser -lyons -##sti -joking -eddy -judgement -omitted -digits -##cts -##game -juniors -##rae -cents -stricken -une -##ngo -wizards -weir -breton -nan -technician -fibers -liking -royalty -##cca -154 -persia -terribly -magician -##rable -##unt -vance -cafeteria -booker -camille -warmer -##static -consume -cavern -gaps -compass -contemporaries -foyer -soothing -graveyard -maj -plunged -blush -##wear -cascade -demonstrates -ordinance -##nov -boyle -##lana -rockefeller -shaken -banjo -izzy -##ense -breathless -vines -##32 -##eman -alterations -chromosome -dwellings -feudal -mole -153 -catalonia -relics -tenant -mandated -##fm -fridge -hats -honesty -patented -raul -heap -cruisers -accusing -enlightenment -infants -wherein -chatham -contractors -zen -affinity -hc -osborne -piston -156 -traps -maturity -##rana -lagos -##zal -peering -##nay -attendant -dealers -protocols -subset -prospects -biographical -##cre -artery -##zers -insignia -nuns -endured -##eration -recommend -schwartz -serbs -berger -cromwell -crossroads -##ctor -enduring -clasped -grounded -##bine -marseille -twitched -abel -choke -https -catalyst -moldova -italians -##tist -disastrous -wee -##oured -##nti -wwf -nope -##piration -##asa -expresses -thumbs -167 -##nza -coca -1781 -cheating -##ption -skipped -sensory -heidelberg -spies -satan -dangers -semifinal -202 -bohemia -whitish -confusing -shipbuilding -relies -surgeons -landings -ravi -baku -moor -suffix -alejandro -##yana -litre -upheld -##unk -rajasthan -##rek -coaster -insists -posture -scenarios -etienne -favoured -appoint -transgender -elephants -poked -greenwood -defences -fulfilled -militant -somali -1758 -chalk -potent -##ucci -migrants -wink -assistants -nos -restriction -activism -niger -##ario -colon -shaun -##sat -daphne -##erated -swam -congregations -reprise -considerations -magnet -playable -xvi -##р -overthrow -tobias -knob -chavez -coding -##mers -propped -katrina -orient -newcomer -##suke -temperate -##pool -farmhouse -interrogation -##vd -committing -##vert -forthcoming -strawberry -joaquin -macau -ponds -shocking -siberia -##cellular -chant -contributors -##nant -##ologists -sped -absorb -hail -1782 -spared -##hore -barbados -karate -opus -originates -saul -##xie -evergreen -leaped -##rock -correlation -exaggerated -weekday -unification -bump -tracing -brig -afb -pathways -utilizing -##ners -mod -mb -disturbance -kneeling -##stad -##guchi -100th -pune -##thy -decreasing -168 -manipulation -miriam -academia -ecosystem -occupational -rbi -##lem -rift -##14 -rotary -stacked -incorporation -awakening -generators -guerrero -racist -##omy -cyber -derivatives -culminated -allie -annals -panzer -sainte -wikipedia -pops -zu -austro -##vate -algerian -politely -nicholson -mornings -educate -tastes -thrill -dartmouth -##gating -db -##jee -regan -differing -concentrating -choreography -divinity -##media -pledged -alexandre -routing -gregor -madeline -##idal -apocalypse -##hora -gunfire -culminating -elves -fined -liang -lam -programmed -tar -guessing -transparency -gabrielle -##gna -cancellation -flexibility -##lining -accession -shea -stronghold -nets -specializes -##rgan -abused -hasan -sgt -ling -exceeding -##₄ -admiration -supermarket -##ark -photographers -specialised -tilt -resonance -hmm -perfume -380 -sami -threatens -garland -botany -guarding -boiled -greet -puppy -russo -supplier -wilmington -vibrant -vijay -##bius -paralympic -grumbled -paige -faa -licking -margins -hurricanes -##gong -fest -grenade -ripping -##uz -counseling -weigh -##sian -needles -wiltshire -edison -costly -##not -fulton -tramway -redesigned -staffordshire -cache -gasping -watkins -sleepy -candidacy -##group -monkeys -timeline -throbbing -##bid -##sos -berth -uzbekistan -vanderbilt -bothering -overturned -ballots -gem -##iger -sunglasses -subscribers -hooker -compelling -ang -exceptionally -saloon -stab -##rdi -carla -terrifying -rom -##vision -coil -##oids -satisfying -vendors -31st -mackay -deities -overlooked -ambient -bahamas -felipe -olympia -whirled -botanist -advertised -tugging -##dden -disciples -morales -unionist -rites -foley -morse -motives -creepy -##₀ -soo -##sz -bargain -highness -frightening -turnpike -tory -reorganization -##cer -depict -biographer -##walk -unopposed -manifesto -##gles -institut -emile -accidental -kapoor -##dam -kilkenny -cortex -lively -##13 -romanesque -jain -shan -cannons -##ood -##ske -petrol -echoing -amalgamated -disappears -cautious -proposes -sanctions -trenton -##ر -flotilla -aus -contempt -tor -canary -cote -theirs -##hun -conceptual -deleted -fascinating -paso -blazing -elf -honourable -hutchinson -##eiro -##outh -##zin -surveyor -tee -amidst -wooded -reissue -intro -##ono -cobb -shelters -newsletter -hanson -brace -encoding -confiscated -dem -caravan -marino -scroll -melodic -cows -imam -##adi -##aneous -northward -searches -biodiversity -cora -310 -roaring -##bers -connell -theologian -halo -compose -pathetic -unmarried -dynamo -##oot -az -calculation -toulouse -deserves -humour -nr -forgiveness -tam -undergone -martyr -pamela -myths -whore -counselor -hicks -290 -heavens -battleship -electromagnetic -##bbs -stellar -establishments -presley -hopped -##chin -temptation -90s -wills -nas -##yuan -nhs -##nya -seminars -##yev -adaptations -gong -asher -lex -indicator -sikh -tobago -cites -goin -##yte -satirical -##gies -characterised -correspond -bubbles -lure -participates -##vid -eruption -skate -therapeutic -1785 -canals -wholesale -defaulted -sac -460 -petit -##zzled -virgil -leak -ravens -256 -portraying -##yx -ghetto -creators -dams -portray -vicente -##rington -fae -namesake -bounty -##arium -joachim -##ota -##iser -aforementioned -axle -snout -depended -dismantled -reuben -480 -##ibly -gallagher -##lau -##pd -earnest -##ieu -##iary -inflicted -objections -##llar -asa -gritted -##athy -jericho -##sea -##was -flick -underside -ceramics -undead -substituted -195 -eastward -undoubtedly -wheeled -chimney -##iche -guinness -cb -##ager -siding -##bell -traitor -baptiste -disguised -inauguration -149 -tipperary -choreographer -perched -warmed -stationary -eco -##ike -##ntes -bacterial -##aurus -flores -phosphate -##core -attacker -invaders -alvin -intersects -a1 -indirectly -immigrated -businessmen -cornelius -valves -narrated -pill -sober -ul -nationale -monastic -applicants -scenery -##jack -161 -motifs -constitutes -cpu -##osh -jurisdictions -sd -tuning -irritation -woven -##uddin -fertility -gao -##erie -antagonist -impatient -glacial -hides -boarded -denominations -interception -##jas -cookie -nicola -##tee -algebraic -marquess -bahn -parole -buyers -bait -turbines -paperwork -bestowed -natasha -renee -oceans -purchases -157 -vaccine -215 -##tock -fixtures -playhouse -integrate -jai -oswald -intellectuals -##cky -booked -nests -mortimer -##isi -obsession -sept -##gler -##sum -440 -scrutiny -simultaneous -squinted -##shin -collects -oven -shankar -penned -remarkably -##я -slips -luggage -spectral -1786 -collaborations -louie -consolidation -##ailed -##ivating -420 -hoover -blackpool -harness -ignition -vest -tails -belmont -mongol -skinner -##nae -visually -mage -derry -##tism -##unce -stevie -transitional -##rdy -redskins -drying -prep -prospective -##21 -annoyance -oversee -##loaded -fills -##books -##iki -announces -fda -scowled -respects -prasad -mystic -tucson -##vale -revue -springer -bankrupt -1772 -aristotle -salvatore -habsburg -##geny -dal -natal -nut -pod -chewing -darts -moroccan -walkover -rosario -lenin -punjabi -##ße -grossed -scattering -wired -invasive -hui -polynomial -corridors -wakes -gina -portrays -##cratic -arid -retreating -erich -irwin -sniper -##dha -linen -lindsey -maneuver -butch -shutting -socio -bounce -commemorative -postseason -jeremiah -pines -275 -mystical -beads -bp -abbas -furnace -bidding -consulted -assaulted -empirical -rubble -enclosure -sob -weakly -cancel -polly -yielded -##emann -curly -prediction -battered -70s -vhs -jacqueline -render -sails -barked -detailing -grayson -riga -sloane -raging -##yah -herbs -bravo -##athlon -alloy -giggle -imminent -suffers -assumptions -waltz -##itate -accomplishments -##ited -bathing -remixed -deception -prefix -##emia -deepest -##tier -##eis -balkan -frogs -##rong -slab -##pate -philosophers -peterborough -grains -imports -dickinson -rwanda -##atics -1774 -dirk -lan -tablets -##rove -clone -##rice -caretaker -hostilities -mclean -##gre -regimental -treasures -norms -impose -tsar -tango -diplomacy -variously -complain -192 -recognise -arrests -1779 -celestial -pulitzer -##dus -bing -libretto -##moor -adele -splash -##rite -expectation -lds -confronts -##izer -spontaneous -harmful -wedge -entrepreneurs -buyer -##ope -bilingual -translate -rugged -conner -circulated -uae -eaton -##gra -##zzle -lingered -lockheed -vishnu -reelection -alonso -##oom -joints -yankee -headline -cooperate -heinz -laureate -invading -##sford -echoes -scandinavian -##dham -hugging -vitamin -salute -micah -hind -trader -##sper -radioactive -##ndra -militants -poisoned -ratified -remark -campeonato -deprived -wander -prop -##dong -outlook -##tani -##rix -##eye -chiang -darcy -##oping -mandolin -spice -statesman -babylon -182 -walled -forgetting -afro -##cap -158 -giorgio -buffer -##polis -planetary -##gis -overlap -terminals -kinda -centenary -##bir -arising -manipulate -elm -ke -1770 -ak -##tad -chrysler -mapped -moose -pomeranian -quad -macarthur -assemblies -shoreline -recalls -stratford -##rted -noticeable -##evic -imp -##rita -##sque -accustomed -supplying -tents -disgusted -vogue -sipped -filters -khz -reno -selecting -luftwaffe -mcmahon -tyne -masterpiece -carriages -collided -dunes -exercised -flare -remembers -muzzle -##mobile -heck -##rson -burgess -lunged -middleton -boycott -bilateral -##sity -hazardous -lumpur -multiplayer -spotlight -jackets -goldman -liege -porcelain -rag -waterford -benz -attracts -hopeful -battling -ottomans -kensington -baked -hymns -cheyenne -lattice -levine -borrow -polymer -clashes -michaels -monitored -commitments -denounced -##25 -##von -cavity -##oney -hobby -akin -##holders -futures -intricate -cornish -patty -##oned -illegally -dolphin -##lag -barlow -yellowish -maddie -apologized -luton -plagued -##puram -nana -##rds -sway -fanny -łodz -##rino -psi -suspicions -hanged -##eding -initiate -charlton -##por -nak -competent -235 -analytical -annex -wardrobe -reservations -##rma -sect -162 -fairfax -hedge -piled -buckingham -uneven -bauer -simplicity -snyder -interpret -accountability -donors -moderately -byrd -continents -##cite -##max -disciple -hr -jamaican -ping -nominees -##uss -mongolian -diver -attackers -eagerly -ideological -pillows -miracles -apartheid -revolver -sulfur -clinics -moran -163 -##enko -ile -katy -rhetoric -##icated -chronology -recycling -##hrer -elongated -mughal -pascal -profiles -vibration -databases -domination -##fare -##rant -matthias -digest -rehearsal -polling -weiss -initiation -reeves -clinging -flourished -impress -ngo -##hoff -##ume -buckley -symposium -rhythms -weed -emphasize -transforming -##taking -##gence -##yman -accountant -analyze -flicker -foil -priesthood -voluntarily -decreases -##80 -##hya -slater -sv -charting -mcgill -##lde -moreno -##iu -besieged -zur -robes -##phic -admitting -api -deported -turmoil -peyton -earthquakes -##ares -nationalists -beau -clair -brethren -interrupt -welch -curated -galerie -requesting -164 -##ested -impending -steward -viper -##vina -complaining -beautifully -brandy -foam -nl -1660 -##cake -alessandro -punches -laced -explanations -##lim -attribute -clit -reggie -discomfort -##cards -smoothed -whales -##cene -adler -countered -duffy -disciplinary -widening -recipe -reliance -conducts -goats -gradient -preaching -##shaw -matilda -quasi -striped -meridian -cannabis -cordoba -certificates -##agh -##tering -graffiti -hangs -pilgrims -repeats -##ych -revive -urine -etat -##hawk -fueled -belts -fuzzy -susceptible -##hang -mauritius -salle -sincere -beers -hooks -##cki -arbitration -entrusted -advise -sniffed -seminar -junk -donnell -processors -principality -strapped -celia -mendoza -everton -fortunes -prejudice -starving -reassigned -steamer -##lund -tuck -evenly -foreman -##ffen -dans -375 -envisioned -slit -##xy -baseman -liberia -rosemary -##weed -electrified -periodically -potassium -stride -contexts -sperm -slade -mariners -influx -bianca -subcommittee -##rane -spilling -icao -estuary -##nock -delivers -iphone -##ulata -isa -mira -bohemian -dessert -##sbury -welcoming -proudly -slowing -##chs -musee -ascension -russ -##vian -waits -##psy -africans -exploit -##morphic -gov -eccentric -crab -peck -##ull -entrances -formidable -marketplace -groom -bolted -metabolism -patton -robbins -courier -payload -endure -##ifier -andes -refrigerator -##pr -ornate -##uca -ruthless -illegitimate -masonry -strasbourg -bikes -adobe -##³ -apples -quintet -willingly -niche -bakery -corpses -energetic -##cliffe -##sser -##ards -177 -centimeters -centro -fuscous -cretaceous -rancho -##yde -andrei -telecom -tottenham -oasis -ordination -vulnerability -presiding -corey -cp -penguins -sims -##pis -malawi -piss -##48 -correction -##cked -##ffle -##ryn -countdown -detectives -psychiatrist -psychedelic -dinosaurs -blouse -##get -choi -vowed -##oz -randomly -##pol -49ers -scrub -blanche -bruins -dusseldorf -##using -unwanted -##ums -212 -dominique -elevations -headlights -om -laguna -##oga -1750 -famously -ignorance -shrewsbury -##aine -ajax -breuning -che -confederacy -greco -overhaul -##screen -paz -skirts -disagreement -cruelty -jagged -phoebe -shifter -hovered -viruses -##wes -mandy -##lined -##gc -landlord -squirrel -dashed -##ι -ornamental -gag -wally -grange -literal -spurs -undisclosed -proceeding -yin -##text -billie -orphan -spanned -humidity -indy -weighted -presentations -explosions -lucian -##tary -vaughn -hindus -##anga -##hell -psycho -171 -daytona -protects -efficiently -rematch -sly -tandem -##oya -rebranded -impaired -hee -metropolis -peach -godfrey -diaspora -ethnicity -prosperous -gleaming -dar -grossing -playback -##rden -stripe -pistols -##tain -births -labelled -##cating -172 -rudy -alba -##onne -aquarium -hostility -##gb -##tase -shudder -sumatra -hardest -lakers -consonant -creeping -demos -homicide -capsule -zeke -liberties -expulsion -pueblo -##comb -trait -transporting -##ddin -##neck -##yna -depart -gregg -mold -ledge -hangar -oldham -playboy -termination -analysts -gmbh -romero -##itic -insist -cradle -filthy -brightness -slash -shootout -deposed -bordering -##truct -isis -microwave -tumbled -sheltered -cathy -werewolves -messy -andersen -convex -clapped -clinched -satire -wasting -edo -vc -rufus -##jak -mont -##etti -poznan -##keeping -restructuring -transverse -##rland -azerbaijani -slovene -gestures -roommate -choking -shear -##quist -vanguard -oblivious -##hiro -disagreed -baptism -##lich -coliseum -##aceae -salvage -societe -cory -locke -relocation -relying -versailles -ahl -swelling -##elo -cheerful -##word -##edes -gin -sarajevo -obstacle -diverted -##nac -messed -thoroughbred -fluttered -utrecht -chewed -acquaintance -assassins -dispatch -mirza -##wart -nike -salzburg -swell -yen -##gee -idle -ligue -samson -##nds -##igh -playful -spawned -##cise -tease -##case -burgundy -##bot -stirring -skeptical -interceptions -marathi -##dies -bedrooms -aroused -pinch -##lik -preferences -tattoos -buster -digitally -projecting -rust -##ital -kitten -priorities -addison -pseudo -##guard -dusk -icons -sermon -##psis -##iba -bt -##lift -##xt -ju -truce -rink -##dah -##wy -defects -psychiatry -offences -calculate -glucose -##iful -##rized -##unda -francaise -##hari -richest -warwickshire -carly -1763 -purity -redemption -lending -##cious -muse -bruises -cerebral -aero -carving -##name -preface -terminology -invade -monty -##int -anarchist -blurred -##iled -rossi -treats -guts -shu -foothills -ballads -undertaking -premise -cecilia -affiliates -blasted -conditional -wilder -minors -drone -rudolph -buffy -swallowing -horton -attested -##hop -rutherford -howell -primetime -livery -penal -##bis -minimize -hydro -wrecked -wrought -palazzo -##gling -cans -vernacular -friedman -nobleman -shale -walnut -danielle -##ection -##tley -sears -##kumar -chords -lend -flipping -streamed -por -dracula -gallons -sacrifices -gamble -orphanage -##iman -mckenzie -##gible -boxers -daly -##balls -##ان -208 -##ific -##rative -##iq -exploited -slated -##uity -circling -hillary -pinched -goldberg -provost -campaigning -lim -piles -ironically -jong -mohan -successors -usaf -##tem -##ught -autobiographical -haute -preserves -##ending -acquitted -comparisons -203 -hydroelectric -gangs -cypriot -torpedoes -rushes -chrome -derive -bumps -instability -fiat -pets -##mbe -silas -dye -reckless -settler -##itation -info -heats -##writing -176 -canonical -maltese -fins -mushroom -stacy -aspen -avid -##kur -##loading -vickers -gaston -hillside -statutes -wilde -gail -kung -sabine -comfortably -motorcycles -##rgo -169 -pneumonia -fetch -##sonic -axel -faintly -parallels -##oop -mclaren -spouse -compton -interdisciplinary -miner -##eni -181 -clamped -##chal -##llah -separates -versa -##mler -scarborough -labrador -##lity -##osing -rutgers -hurdles -como -166 -burt -divers -##100 -wichita -cade -coincided -##erson -bruised -mla -##pper -vineyard -##ili -##brush -notch -mentioning -jase -hearted -kits -doe -##acle -pomerania -##ady -ronan -seizure -pavel -problematic -##zaki -domenico -##ulin -catering -penelope -dependence -parental -emilio -ministerial -atkinson -##bolic -clarkson -chargers -colby -grill -peeked -arises -summon -##aged -fools -##grapher -faculties -qaeda -##vial -garner -refurbished -##hwa -geelong -disasters -nudged -bs -shareholder -lori -algae -reinstated -rot -##ades -##nous -invites -stainless -183 -inclusive -##itude -diocesan -til -##icz -denomination -##xa -benton -floral -registers -##ider -##erman -##kell -absurd -brunei -guangzhou -hitter -retaliation -##uled -##eve -blanc -nh -consistency -contamination -##eres -##rner -dire -palermo -broadcasters -diaries -inspire -vols -brewer -tightening -ky -mixtape -hormone -##tok -stokes -##color -##dly -##ssi -pg -##ometer -##lington -sanitation -##tility -intercontinental -apps -##adt -¹⁄₂ -cylinders -economies -favourable -unison -croix -gertrude -odyssey -vanity -dangling -##logists -upgrades -dice -middleweight -practitioner -##ight -206 -henrik -parlor -orion -angered -lac -python -blurted -##rri -sensual -intends -swings -angled -##phs -husky -attain -peerage -precinct -textiles -cheltenham -shuffled -dai -confess -tasting -bhutan -##riation -tyrone -segregation -abrupt -ruiz -##rish -smirked -blackwell -confidential -browning -amounted -##put -vase -scarce -fabulous -raided -staple -guyana -unemployed -glider -shay -##tow -carmine -troll -intervene -squash -superstar -##uce -cylindrical -len -roadway -researched -handy -##rium -##jana -meta -lao -declares -##rring -##tadt -##elin -##kova -willem -shrubs -napoleonic -realms -skater -qi -volkswagen -##ł -tad -hara -archaeologist -awkwardly -eerie -##kind -wiley -##heimer -##24 -titus -organizers -cfl -crusaders -lama -usb -vent -enraged -thankful -occupants -maximilian -##gaard -possessing -textbooks -##oran -collaborator -quaker -##ulo -avalanche -mono -silky -straits -isaiah -mustang -surged -resolutions -potomac -descend -cl -kilograms -plato -strains -saturdays -##olin -bernstein -##ype -holstein -ponytail -##watch -belize -conversely -heroine -perpetual -##ylus -charcoal -piedmont -glee -negotiating -backdrop -prologue -##jah -##mmy -pasadena -climbs -ramos -sunni -##holm -##tner -##tri -anand -deficiency -hertfordshire -stout -##avi -aperture -orioles -##irs -doncaster -intrigued -bombed -coating -otis -##mat -cocktail -##jit -##eto -amir -arousal -sar -##proof -##act -##ories -dixie -pots -##bow -whereabouts -159 -##fted -drains -bullying -cottages -scripture -coherent -fore -poe -appetite -##uration -sampled -##ators -##dp -derrick -rotor -jays -peacock -installment -##rro -advisors -##coming -rodeo -scotch -##mot -##db -##fen -##vant -ensued -rodrigo -dictatorship -martyrs -twenties -##н -towed -incidence -marta -rainforest -sai -scaled -##cles -oceanic -qualifiers -symphonic -mcbride -dislike -generalized -aubrey -colonization -##iation -##lion -##ssing -disliked -lublin -salesman -##ulates -spherical -whatsoever -sweating -avalon -contention -punt -severity -alderman -atari -##dina -##grant -##rop -scarf -seville -vertices -annexation -fairfield -fascination -inspiring -launches -palatinate -regretted -##rca -feral -##iom -elk -nap -olsen -reddy -yong -##leader -##iae -garment -transports -feng -gracie -outrage -viceroy -insides -##esis -breakup -grady -organizer -softer -grimaced -222 -murals -galicia -arranging -vectors -##rsten -bas -##sb -##cens -sloan -##eka -bitten -ara -fender -nausea -bumped -kris -banquet -comrades -detector -persisted -##llan -adjustment -endowed -cinemas -##shot -sellers -##uman -peek -epa -kindly -neglect -simpsons -talon -mausoleum -runaway -hangul -lookout -##cic -rewards -coughed -acquainted -chloride -##ald -quicker -accordion -neolithic -##qa -artemis -coefficient -lenny -pandora -tx -##xed -ecstasy -litter -segunda -chairperson -gemma -hiss -rumor -vow -nasal -antioch -compensate -patiently -transformers -##eded -judo -morrow -penis -posthumous -philips -bandits -husbands -denote -flaming -##any -##phones -langley -yorker -1760 -walters -##uo -##kle -gubernatorial -fatty -samsung -leroy -outlaw -##nine -unpublished -poole -jakob -##ᵢ -##ₙ -crete -distorted -superiority -##dhi -intercept -crust -mig -claus -crashes -positioning -188 -stallion -301 -frontal -armistice -##estinal -elton -aj -encompassing -camel -commemorated -malaria -woodward -calf -cigar -penetrate -##oso -willard -##rno -##uche -illustrate -amusing -convergence -noteworthy -##lma -##rva -journeys -realise -manfred -##sable -410 -##vocation -hearings -fiance -##posed -educators -provoked -adjusting -##cturing -modular -stockton -paterson -vlad -rejects -electors -selena -maureen -##tres -uber -##rce -swirled -##num -proportions -nanny -pawn -naturalist -parma -apostles -awoke -ethel -wen -##bey -monsoon -overview -##inating -mccain -rendition -risky -adorned -##ih -equestrian -germain -nj -conspicuous -confirming -##yoshi -shivering -##imeter -milestone -rumours -flinched -bounds -smacked -token -##bei -lectured -automobiles -##shore -impacted -##iable -nouns -nero -##leaf -ismail -prostitute -trams -##lace -bridget -sud -stimulus -impressions -reins -revolves -##oud -##gned -giro -honeymoon -##swell -criterion -##sms -##uil -libyan -prefers -##osition -211 -preview -sucks -accusation -bursts -metaphor -diffusion -tolerate -faye -betting -cinematographer -liturgical -specials -bitterly -humboldt -##ckle -flux -rattled -##itzer -archaeologists -odor -authorised -marshes -discretion -##ов -alarmed -archaic -inverse -##leton -explorers -##pine -drummond -tsunami -woodlands -##minate -##tland -booklet -insanity -owning -insert -crafted -calculus -##tore -receivers -##bt -stung -##eca -##nched -prevailing -travellers -eyeing -lila -graphs -##borne -178 -julien -##won -morale -adaptive -therapist -erica -cw -libertarian -bowman -pitches -vita -##ional -crook -##ads -##entation -caledonia -mutiny -##sible -1840s -automation -##ß -flock -##pia -ironic -pathology -##imus -remarried -##22 -joker -withstand -energies -##att -shropshire -hostages -madeleine -tentatively -conflicting -mateo -recipes -euros -ol -mercenaries -nico -##ndon -albuquerque -augmented -mythical -bel -freud -##child -cough -##lica -365 -freddy -lillian -genetically -nuremberg -calder -209 -bonn -outdoors -paste -suns -urgency -vin -restraint -tyson -##cera -##selle -barrage -bethlehem -kahn -##par -mounts -nippon -barony -happier -ryu -makeshift -sheldon -blushed -castillo -barking -listener -taped -bethel -fluent -headlines -pornography -rum -disclosure -sighing -mace -doubling -gunther -manly -##plex -rt -interventions -physiological -forwards -emerges -##tooth -##gny -compliment -rib -recession -visibly -barge -faults -connector -exquisite -prefect -##rlin -patio -##cured -elevators -brandt -italics -pena -173 -wasp -satin -ea -botswana -graceful -respectable -##jima -##rter -##oic -franciscan -generates -##dl -alfredo -disgusting -##olate -##iously -sherwood -warns -cod -promo -cheryl -sino -##ة -##escu -twitch -##zhi -brownish -thom -ortiz -##dron -densely -##beat -carmel -reinforce -##bana -187 -anastasia -downhill -vertex -contaminated -remembrance -harmonic -homework -##sol -fiancee -gears -olds -angelica -loft -ramsay -quiz -colliery -sevens -##cape -autism -##hil -walkway -##boats -ruben -abnormal -ounce -khmer -##bbe -zachary -bedside -morphology -punching -##olar -sparrow -convinces -##35 -hewitt -queer -remastered -rods -mabel -solemn -notified -lyricist -symmetric -##xide -174 -encore -passports -wildcats -##uni -baja -##pac -mildly -##ease -bleed -commodity -mounds -glossy -orchestras -##omo -damian -prelude -ambitions -##vet -awhile -remotely -##aud -asserts -imply -##iques -distinctly -modelling -remedy -##dded -windshield -dani -xiao -##endra -audible -powerplant -1300 -invalid -elemental -acquisitions -##hala -immaculate -libby -plata -smuggling -ventilation -denoted -minh -##morphism -430 -differed -dion -kelley -lore -mocking -sabbath -spikes -hygiene -drown -runoff -stylized -tally -liberated -aux -interpreter -righteous -aba -siren -reaper -pearce -millie -##cier -##yra -gaius -##iso -captures -##ttering -dorm -claudio -##sic -benches -knighted -blackness -##ored -discount -fumble -oxidation -routed -##ς -novak -perpendicular -spoiled -fracture -splits -##urt -pads -topology -##cats -axes -fortunate -offenders -protestants -esteem -221 -broadband -convened -frankly -hound -prototypes -isil -facilitated -keel -##sher -sahara -awaited -bubba -orb -prosecutors -186 -hem -520 -##xing -relaxing -remnant -romney -sorted -slalom -stefano -ulrich -##active -exemption -folder -pauses -foliage -hitchcock -epithet -204 -criticisms -##aca -ballistic -brody -hinduism -chaotic -youths -equals -##pala -pts -thicker -analogous -capitalist -improvised -overseeing -sinatra -ascended -beverage -##tl -straightforward -##kon -curran -##west -bois -325 -induce -surveying -emperors -sax -unpopular -##kk -cartoonist -fused -##mble -unto -##yuki -localities -##cko -##ln -darlington -slain -academie -lobbying -sediment -puzzles -##grass -defiance -dickens -manifest -tongues -alumnus -arbor -coincide -184 -appalachian -mustafa -examiner -cabaret -traumatic -yves -bracelet -draining -heroin -magnum -baths -odessa -consonants -mitsubishi -##gua -kellan -vaudeville -##fr -joked -null -straps -probation -##ław -ceded -interfaces -##pas -##zawa -blinding -viet -224 -rothschild -museo -640 -huddersfield -##vr -tactic -##storm -brackets -dazed -incorrectly -##vu -reg -glazed -fearful -manifold -benefited -irony -##sun -stumbling -##rte -willingness -balkans -mei -wraps -##aba -injected -##lea -gu -syed -harmless -##hammer -bray -takeoff -poppy -timor -cardboard -astronaut -purdue -weeping -southbound -cursing -stalls -diagonal -##neer -lamar -bryce -comte -weekdays -harrington -##uba -negatively -##see -lays -grouping -##cken -##henko -affirmed -halle -modernist -##lai -hodges -smelling -aristocratic -baptized -dismiss -justification -oilers -##now -coupling -qin -snack -healer -##qing -gardener -layla -battled -formulated -stephenson -gravitational -##gill -##jun -1768 -granny -coordinating -suites -##cd -##ioned -monarchs -##cote -##hips -sep -blended -apr -barrister -deposition -fia -mina -policemen -paranoid -##pressed -churchyard -covert -crumpled -creep -abandoning -tr -transmit -conceal -barr -understands -readiness -spire -##cology -##enia -##erry -610 -startling -unlock -vida -bowled -slots -##nat -##islav -spaced -trusting -admire -rig -##ink -slack -##70 -mv -207 -casualty -##wei -classmates -##odes -##rar -##rked -amherst -furnished -evolve -foundry -menace -mead -##lein -flu -wesleyan -##kled -monterey -webber -##vos -wil -##mith -##на -bartholomew -justices -restrained -##cke -amenities -191 -mediated -sewage -trenches -ml -mainz -##thus -1800s -##cula -##inski -caine -bonding -213 -converts -spheres -superseded -marianne -crypt -sweaty -ensign -historia -##br -spruce -##post -##ask -forks -thoughtfully -yukon -pamphlet -ames -##uter -karma -##yya -bryn -negotiation -sighs -incapable -##mbre -##ntial -actresses -taft -##mill -luce -prevailed -##amine -1773 -motionless -envoy -testify -investing -sculpted -instructors -provence -kali -cullen -horseback -##while -goodwin -##jos -gaa -norte -##ldon -modify -wavelength -abd -214 -skinned -sprinter -forecast -scheduling -marries -squared -tentative -##chman -boer -##isch -bolts -swap -fisherman -assyrian -impatiently -guthrie -martins -murdoch -194 -tanya -nicely -dolly -lacy -med -##45 -syn -decks -fashionable -millionaire -##ust -surfing -##ml -##ision -heaved -tammy -consulate -attendees -routinely -197 -fuse -saxophonist -backseat -malaya -##lord -scowl -tau -##ishly -193 -sighted -steaming -##rks -303 -911 -##holes -##hong -ching -##wife -bless -conserved -jurassic -stacey -unix -zion -chunk -rigorous -blaine -198 -peabody -slayer -dismay -brewers -nz -##jer -det -##glia -glover -postwar -int -penetration -sylvester -imitation -vertically -airlift -heiress -knoxville -viva -##uin -390 -macon -##rim -##fighter -##gonal -janice -##orescence -##wari -marius -belongings -leicestershire -196 -blanco -inverted -preseason -sanity -sobbing -##due -##elt -##dled -collingwood -regeneration -flickering -shortest -##mount -##osi -feminism -##lat -sherlock -cabinets -fumbled -northbound -precedent -snaps -##mme -researching -##akes -guillaume -insights -manipulated -vapor -neighbour -sap -gangster -frey -f1 -stalking -scarcely -callie -barnett -tendencies -audi -doomed -assessing -slung -panchayat -ambiguous -bartlett -##etto -distributing -violating -wolverhampton -##hetic -swami -histoire -##urus -liable -pounder -groin -hussain -larsen -popping -surprises -##atter -vie -curt -##station -mute -relocate -musicals -authorization -richter -##sef -immortality -tna -bombings -##press -deteriorated -yiddish -##acious -robbed -colchester -cs -pmid -ao -verified -balancing -apostle -swayed -recognizable -oxfordshire -retention -nottinghamshire -contender -judd -invitational -shrimp -uhf -##icient -cleaner -longitudinal -tanker -##mur -acronym -broker -koppen -sundance -suppliers -##gil -4000 -clipped -fuels -petite -##anne -landslide -helene -diversion -populous -landowners -auspices -melville -quantitative -##xes -ferries -nicky -##llus -doo -haunting -roche -carver -downed -unavailable -##pathy -approximation -hiroshima -##hue -garfield -valle -comparatively -keyboardist -traveler -##eit -congestion -calculating -subsidiaries -##bate -serb -modernization -fairies -deepened -ville -averages -##lore -inflammatory -tonga -##itch -co₂ -squads -##hea -gigantic -serum -enjoyment -retailer -verona -35th -cis -##phobic -magna -technicians -##vati -arithmetic -##sport -levin -##dation -amtrak -chow -sienna -##eyer -backstage -entrepreneurship -##otic -learnt -tao -##udy -worcestershire -formulation -baggage -hesitant -bali -sabotage -##kari -barren -enhancing -murmur -pl -freshly -putnam -syntax -aces -medicines -resentment -bandwidth -##sier -grins -chili -guido -##sei -framing -implying -gareth -lissa -genevieve -pertaining -admissions -geo -thorpe -proliferation -sato -bela -analyzing -parting -##gor -awakened -##isman -huddled -secrecy -##kling -hush -gentry -540 -dungeons -##ego -coasts -##utz -sacrificed -##chule -landowner -mutually -prevalence -programmer -adolescent -disrupted -seaside -gee -trusts -vamp -georgie -##nesian -##iol -schedules -sindh -##market -etched -hm -sparse -bey -beaux -scratching -gliding -unidentified -216 -collaborating -gems -jesuits -oro -accumulation -shaping -mbe -anal -##xin -231 -enthusiasts -newscast -##egan -janata -dewey -parkinson -179 -ankara -biennial -towering -dd -inconsistent -950 -##chet -thriving -terminate -cabins -furiously -eats -advocating -donkey -marley -muster -phyllis -leiden -##user -grassland -glittering -iucn -loneliness -217 -memorandum -armenians -##ddle -popularized -rhodesia -60s -lame -##illon -sans -bikini -header -orbits -##xx -##finger -##ulator -sharif -spines -biotechnology -strolled -naughty -yates -##wire -fremantle -milo -##mour -abducted -removes -##atin -humming -wonderland -##chrome -##ester -hume -pivotal -##rates -armand -grams -believers -elector -rte -apron -bis -scraped -##yria -endorsement -initials -##llation -eps -dotted -hints -buzzing -emigration -nearer -##tom -indicators -##ulu -coarse -neutron -protectorate -##uze -directional -exploits -pains -loire -1830s -proponents -guggenheim -rabbits -ritchie -305 -hectare -inputs -hutton -##raz -verify -##ako -boilers -longitude -##lev -skeletal -yer -emilia -citrus -compromised -##gau -pokemon -prescription -paragraph -eduard -cadillac -attire -categorized -kenyan -weddings -charley -##bourg -entertain -monmouth -##lles -nutrients -davey -mesh -incentive -practised -ecosystems -kemp -subdued -overheard -##rya -bodily -maxim -##nius -apprenticeship -ursula -##fight -lodged -rug -silesian -unconstitutional -patel -inspected -coyote -unbeaten -##hak -34th -disruption -convict -parcel -##cl -##nham -collier -implicated -mallory -##iac -##lab -susannah -winkler -##rber -shia -phelps -sediments -graphical -robotic -##sner -adulthood -mart -smoked -##isto -kathryn -clarified -##aran -divides -convictions -oppression -pausing -burying -##mt -federico -mathias -eileen -##tana -kite -hunched -##acies -189 -##atz -disadvantage -liza -kinetic -greedy -paradox -yokohama -dowager -trunks -ventured -##gement -gupta -vilnius -olaf -##thest -crimean -hopper -##ej -progressively -arturo -mouthed -arrondissement -##fusion -rubin -simulcast -oceania -##orum -##stra -##rred -busiest -intensely -navigator -cary -##vine -##hini -##bies -fife -rowe -rowland -posing -insurgents -shafts -lawsuits -activate -conor -inward -culturally -garlic -265 -##eering -eclectic -##hui -##kee -##nl -furrowed -vargas -meteorological -rendezvous -##aus -culinary -commencement -##dition -quota -##notes -mommy -salaries -overlapping -mule -##iology -##mology -sums -wentworth -##isk -##zione -mainline -subgroup -##illy -hack -plaintiff -verdi -bulb -differentiation -engagements -multinational -supplemented -bertrand -caller -regis -##naire -##sler -##arts -##imated -blossom -propagation -kilometer -viaduct -vineyards -##uate -beckett -optimization -golfer -songwriters -seminal -semitic -thud -volatile -evolving -ridley -##wley -trivial -distributions -scandinavia -jiang -##ject -wrestled -insistence -##dio -emphasizes -napkin -##ods -adjunct -rhyme -##ricted -##eti -hopeless -surrounds -tremble -32nd -smoky -##ntly -oils -medicinal -padded -steer -wilkes -219 -255 -concessions -hue -uniquely -blinded -landon -yahoo -##lane -hendrix -commemorating -dex -specify -chicks -##ggio -intercity -1400 -morley -##torm -highlighting -##oting -pang -oblique -stalled -##liner -flirting -newborn -1769 -bishopric -shaved -232 -currie -##ush -dharma -spartan -##ooped -favorites -smug -novella -sirens -abusive -creations -espana -##lage -paradigm -semiconductor -sheen -##rdo -##yen -##zak -nrl -renew -##pose -##tur -adjutant -marches -norma -##enity -ineffective -weimar -grunt -##gat -lordship -plotting -expenditure -infringement -lbs -refrain -av -mimi -mistakenly -postmaster -1771 -##bara -ras -motorsports -tito -199 -subjective -##zza -bully -stew -##kaya -prescott -1a -##raphic -##zam -bids -styling -paranormal -reeve -sneaking -exploding -katz -akbar -migrant -syllables -indefinitely -##ogical -destroys -replaces -applause -##phine -pest -##fide -218 -articulated -bertie -##thing -##cars -##ptic -courtroom -crowley -aesthetics -cummings -tehsil -hormones -titanic -dangerously -##ibe -stadion -jaenelle -auguste -ciudad -##chu -mysore -partisans -##sio -lucan -philipp -##aly -debating -henley -interiors -##rano -##tious -homecoming -beyonce -usher -henrietta -prepares -weeds -##oman -ely -plucked -##pire -##dable -luxurious -##aq -artifact -password -pasture -juno -maddy -minsk -##dder -##ologies -##rone -assessments -martian -royalist -1765 -examines -##mani -##rge -nino -223 -parry -scooped -relativity -##eli -##uting -##cao -congregational -noisy -traverse -##agawa -strikeouts -nickelodeon -obituary -transylvania -binds -depictions -polk -trolley -##yed -##lard -breeders -##under -dryly -hokkaido -1762 -strengths -stacks -bonaparte -connectivity -neared -prostitutes -stamped -anaheim -gutierrez -sinai -##zzling -bram -fresno -madhya -##86 -proton -##lena -##llum -##phon -reelected -wanda -##anus -##lb -ample -distinguishing -##yler -grasping -sermons -tomato -bland -stimulation -avenues -##eux -spreads -scarlett -fern -pentagon -assert -baird -chesapeake -ir -calmed -distortion -fatalities -##olis -correctional -pricing -##astic -##gina -prom -dammit -ying -collaborate -##chia -welterweight -33rd -pointer -substitution -bonded -umpire -communicating -multitude -paddle -##obe -federally -intimacy -##insky -betray -ssr -##lett -##lean -##lves -##therapy -airbus -##tery -functioned -ud -bearer -biomedical -netflix -##hire -##nca -condom -brink -ik -##nical -macy -##bet -flap -gma -experimented -jelly -lavender -##icles -##ulia -munro -##mian -##tial -rye -##rle -60th -gigs -hottest -rotated -predictions -fuji -bu -##erence -##omi -barangay -##fulness -##sas -clocks -##rwood -##liness -cereal -roe -wight -decker -uttered -babu -onion -xml -forcibly -##df -petra -sarcasm -hartley -peeled -storytelling -##42 -##xley -##ysis -##ffa -fibre -kiel -auditor -fig -harald -greenville -##berries -geographically -nell -quartz -##athic -cemeteries -##lr -crossings -nah -holloway -reptiles -chun -sichuan -snowy -660 -corrections -##ivo -zheng -ambassadors -blacksmith -fielded -fluids -hardcover -turnover -medications -melvin -academies -##erton -ro -roach -absorbing -spaniards -colton -##founded -outsider -espionage -kelsey -245 -edible -##ulf -dora -establishes -##sham -##tries -contracting -##tania -cinematic -costello -nesting -##uron -connolly -duff -##nology -mma -##mata -fergus -sexes -gi -optics -spectator -woodstock -banning -##hee -##fle -differentiate -outfielder -refinery -226 -312 -gerhard -horde -lair -drastically -##udi -landfall -##cheng -motorsport -odi -##achi -predominant -quay -skins -##ental -edna -harshly -complementary -murdering -##aves -wreckage -##90 -ono -outstretched -lennox -munitions -galen -reconcile -470 -scalp -bicycles -gillespie -questionable -rosenberg -guillermo -hostel -jarvis -kabul -volvo -opium -yd -##twined -abuses -decca -outpost -##cino -sensible -neutrality -##64 -ponce -anchorage -atkins -turrets -inadvertently -disagree -libre -vodka -reassuring -weighs -##yal -glide -jumper -ceilings -repertory -outs -stain -##bial -envy -##ucible -smashing -heightened -policing -hyun -mixes -lai -prima -##ples -celeste -##bina -lucrative -intervened -kc -manually -##rned -stature -staffed -bun -bastards -nairobi -priced -##auer -thatcher -##kia -tripped -comune -##ogan -##pled -brasil -incentives -emanuel -hereford -musica -##kim -benedictine -biennale -##lani -eureka -gardiner -rb -knocks -sha -##ael -##elled -##onate -efficacy -ventura -masonic -sanford -maize -leverage -##feit -capacities -santana -##aur -novelty -vanilla -##cter -##tour -benin -##oir -##rain -neptune -drafting -tallinn -##cable -humiliation -##boarding -schleswig -fabian -bernardo -liturgy -spectacle -sweeney -pont -routledge -##tment -cosmos -ut -hilt -sleek -universally -##eville -##gawa -typed -##dry -favors -allegheny -glaciers -##rly -recalling -aziz -##log -parasite -requiem -auf -##berto -##llin -illumination -##breaker -##issa -festivities -bows -govern -vibe -vp -333 -sprawled -larson -pilgrim -bwf -leaping -##rts -##ssel -alexei -greyhound -hoarse -##dler -##oration -seneca -##cule -gaping -##ulously -##pura -cinnamon -##gens -##rricular -craven -fantasies -houghton -engined -reigned -dictator -supervising -##oris -bogota -commentaries -unnatural -fingernails -spirituality -tighten -##tm -canadiens -protesting -intentional -cheers -sparta -##ytic -##iere -##zine -widen -belgarath -controllers -dodd -iaaf -navarre -##ication -defect -squire -steiner -whisky -##mins -560 -inevitably -tome -##gold -chew -##uid -##lid -elastic -##aby -streaked -alliances -jailed -regal -##ined -##phy -czechoslovak -narration -absently -##uld -bluegrass -guangdong -quran -criticizing -hose -hari -##liest -##owa -skier -streaks -deploy -##lom -raft -bose -dialed -huff -##eira -haifa -simplest -bursting -endings -ib -sultanate -##titled -franks -whitman -ensures -sven -##ggs -collaborators -forster -organising -ui -banished -napier -injustice -teller -layered -thump -##otti -roc -battleships -evidenced -fugitive -sadie -robotics -##roud -equatorial -geologist -##iza -yielding -##bron -##sr -internationale -mecca -##diment -sbs -skyline -toad -uploaded -reflective -undrafted -lal -leafs -bayern -##dai -lakshmi -shortlisted -##stick -##wicz -camouflage -donate -af -christi -lau -##acio -disclosed -nemesis -1761 -assemble -straining -northamptonshire -tal -##asi -bernardino -premature -heidi -42nd -coefficients -galactic -reproduce -buzzed -sensations -zionist -monsieur -myrtle -##eme -archery -strangled -musically -viewpoint -antiquities -bei -trailers -seahawks -cured -pee -preferring -tasmanian -lange -sul -##mail -##working -colder -overland -lucivar -massey -gatherings -haitian -##smith -disapproval -flaws -##cco -##enbach -1766 -npr -##icular -boroughs -creole -forums -techno -1755 -dent -abdominal -streetcar -##eson -##stream -procurement -gemini -predictable -##tya -acheron -christoph -feeder -fronts -vendor -bernhard -jammu -tumors -slang -##uber -goaltender -twists -curving -manson -vuelta -mer -peanut -confessions -pouch -unpredictable -allowance -theodor -vascular -##factory -bala -authenticity -metabolic -coughing -nanjing -##cea -pembroke -##bard -splendid -36th -ff -hourly -##ahu -elmer -handel -##ivate -awarding -thrusting -dl -experimentation -##hesion -##46 -caressed -entertained -steak -##rangle -biologist -orphans -baroness -oyster -stepfather -##dridge -mirage -reefs -speeding -##31 -barons -1764 -227 -inhabit -preached -repealed -##tral -honoring -boogie -captives -administer -johanna -##imate -gel -suspiciously -1767 -sobs -##dington -backbone -hayward -garry -##folding -##nesia -maxi -##oof -##ppe -ellison -galileo -##stand -crimea -frenzy -amour -bumper -matrices -natalia -baking -garth -palestinians -##grove -smack -conveyed -ensembles -gardening -##manship -##rup -##stituting -1640 -harvesting -topography -jing -shifters -dormitory -##carriage -##lston -ist -skulls -##stadt -dolores -jewellery -sarawak -##wai -##zier -fences -christy -confinement -tumbling -credibility -fir -stench -##bria -##plication -##nged -##sam -virtues -##belt -marjorie -pba -##eem -##made -celebrates -schooner -agitated -barley -fulfilling -anthropologist -##pro -restrict -novi -regulating -##nent -padres -##rani -##hesive -loyola -tabitha -milky -olson -proprietor -crambidae -guarantees -intercollegiate -ljubljana -hilda -##sko -ignorant -hooded -##lts -sardinia -##lidae -##vation -frontman -privileged -witchcraft -##gp -jammed -laude -poking -##than -bracket -amazement -yunnan -##erus -maharaja -linnaeus -264 -commissioning -milano -peacefully -##logies -akira -rani -regulator -##36 -grasses -##rance -luzon -crows -compiler -gretchen -seaman -edouard -tab -buccaneers -ellington -hamlets -whig -socialists -##anto -directorial -easton -mythological -##kr -##vary -rhineland -semantic -taut -dune -inventions -succeeds -##iter -replication -branched -##pired -jul -prosecuted -kangaroo -penetrated -##avian -middlesbrough -doses -bleak -madam -predatory -relentless -##vili -reluctance -##vir -hailey -crore -silvery -1759 -monstrous -swimmers -transmissions -hawthorn -informing -##eral -toilets -caracas -crouch -kb -##sett -295 -cartel -hadley -##aling -alexia -yvonne -##biology -cinderella -eton -superb -blizzard -stabbing -industrialist -maximus -##gm -##orus -groves -maud -clade -oversized -comedic -##bella -rosen -nomadic -fulham -montane -beverages -galaxies -redundant -swarm -##rot -##folia -##llis -buckinghamshire -fen -bearings -bahadur -##rom -gilles -phased -dynamite -faber -benoit -vip -##ount -##wd -booking -fractured -tailored -anya -spices -westwood -cairns -auditions -inflammation -steamed -##rocity -##acion -##urne -skyla -thereof -watford -torment -archdeacon -transforms -lulu -demeanor -fucked -serge -##sor -mckenna -minas -entertainer -##icide -caress -originate -residue -##sty -1740 -##ilised -##org -beech -##wana -subsidies -##ghton -emptied -gladstone -ru -firefighters -voodoo -##rcle -het -nightingale -tamara -edmond -ingredient -weaknesses -silhouette -285 -compatibility -withdrawing -hampson -##mona -anguish -giggling -##mber -bookstore -##jiang -southernmost -tilting -##vance -bai -economical -rf -briefcase -dreadful -hinted -projections -shattering -totaling -##rogate -analogue -indicted -periodical -fullback -##dman -haynes -##tenberg -##ffs -##ishment -1745 -thirst -stumble -penang -vigorous -##ddling -##kor -##lium -octave -##ove -##enstein -##inen -##ones -siberian -##uti -cbn -repeal -swaying -##vington -khalid -tanaka -unicorn -otago -plastered -lobe -riddle -##rella -perch -##ishing -croydon -filtered -graeme -tripoli -##ossa -crocodile -##chers -sufi -mined -##tung -inferno -lsu -##phi -swelled -utilizes -£2 -cale -periodicals -styx -hike -informally -coop -lund -##tidae -ala -hen -qui -transformations -disposed -sheath -chickens -##cade -fitzroy -sas -silesia -unacceptable -odisha -1650 -sabrina -pe -spokane -ratios -athena -massage -shen -dilemma -##drum -##riz -##hul -corona -doubtful -niall -##pha -##bino -fines -cite -acknowledging -bangor -ballard -bathurst -##resh -huron -mustered -alzheimer -garments -kinase -tyre -warship -##cp -flashback -pulmonary -braun -cheat -kamal -cyclists -constructions -grenades -ndp -traveller -excuses -stomped -signalling -trimmed -futsal -mosques -relevance -##wine -wta -##23 -##vah -##lter -hoc -##riding -optimistic -##´s -deco -sim -interacting -rejecting -moniker -waterways -##ieri -##oku -mayors -gdansk -outnumbered -pearls -##ended -##hampton -fairs -totals -dominating -262 -notions -stairway -compiling -pursed -commodities -grease -yeast -##jong -carthage -griffiths -residual -amc -contraction -laird -sapphire -##marine -##ivated -amalgamation -dissolve -inclination -lyle -packaged -altitudes -suez -canons -graded -lurched -narrowing -boasts -guise -wed -enrico -##ovsky -rower -scarred -bree -cub -iberian -protagonists -bargaining -proposing -trainers -voyages -vans -fishes -##aea -##ivist -##verance -encryption -artworks -kazan -sabre -cleopatra -hepburn -rotting -supremacy -mecklenburg -##brate -burrows -hazards -outgoing -flair -organizes -##ctions -scorpion -##usions -boo -234 -chevalier -dunedin -slapping -##34 -ineligible -pensions -##38 -##omic -manufactures -emails -bismarck -238 -weakening -blackish -ding -mcgee -quo -##rling -northernmost -xx -manpower -greed -sampson -clicking -##ange -##horpe -##inations -##roving -torre -##eptive -##moral -symbolism -38th -asshole -meritorious -outfits -splashed -biographies -sprung -astros -##tale -302 -737 -filly -raoul -nw -tokugawa -linden -clubhouse -##apa -tracts -romano -##pio -putin -tags -##note -chained -dickson -gunshot -moe -gunn -rashid -##tails -zipper -##bas -##nea -contrasted -##ply -##udes -plum -pharaoh -##pile -aw -comedies -ingrid -sandwiches -subdivisions -1100 -mariana -nokia -kamen -hz -delaney -veto -herring -##words -possessive -outlines -##roup -siemens -stairwell -rc -gallantry -messiah -palais -yells -233 -zeppelin -##dm -bolivar -##cede -smackdown -mckinley -##mora -##yt -muted -geologic -finely -unitary -avatar -hamas -maynard -rees -bog -contrasting -##rut -liv -chico -disposition -pixel -##erate -becca -dmitry -yeshiva -narratives -##lva -##ulton -mercenary -sharpe -tempered -navigate -stealth -amassed -keynes -##lini -untouched -##rrie -havoc -lithium -##fighting -abyss -graf -southward -wolverine -balloons -implements -ngos -transitions -##icum -ambushed -concacaf -dormant -economists -##dim -costing -csi -rana -universite -boulders -verity -##llon -collin -mellon -misses -cypress -fluorescent -lifeless -spence -##ulla -crewe -shepard -pak -revelations -##م -jolly -gibbons -paw -##dro -##quel -freeing -##test -shack -fries -palatine -##51 -##hiko -accompaniment -cruising -recycled -##aver -erwin -sorting -synthesizers -dyke -realities -sg -strides -enslaved -wetland -##ghan -competence -gunpowder -grassy -maroon -reactors -objection -##oms -carlson -gearbox -macintosh -radios -shelton -##sho -clergyman -prakash -254 -mongols -trophies -oricon -228 -stimuli -twenty20 -cantonese -cortes -mirrored -##saurus -bhp -cristina -melancholy -##lating -enjoyable -nuevo -##wny -downfall -schumacher -##ind -banging -lausanne -rumbled -paramilitary -reflex -ax -amplitude -migratory -##gall -##ups -midi -barnard -lastly -sherry -##hp -##nall -keystone -##kra -carleton -slippery -##53 -coloring -foe -socket -otter -##rgos -mats -##tose -consultants -bafta -bison -topping -##km -490 -primal -abandonment -transplant -atoll -hideous -mort -pained -reproduced -tae -howling -##turn -unlawful -billionaire -hotter -poised -lansing -##chang -dinamo -retro -messing -nfc -domesday -##mina -blitz -timed -##athing -##kley -ascending -gesturing -##izations -signaled -tis -chinatown -mermaid -savanna -jameson -##aint -catalina -##pet -##hers -cochrane -cy -chatting -##kus -alerted -computation -mused -noelle -majestic -mohawk -campo -octagonal -##sant -##hend -241 -aspiring -##mart -comprehend -iona -paralyzed -shimmering -swindon -rhone -##eley -reputed -configurations -pitchfork -agitation -francais -gillian -lipstick -##ilo -outsiders -pontifical -resisting -bitterness -sewer -rockies -##edd -##ucher -misleading -1756 -exiting -galloway -##nging -risked -##heart -246 -commemoration -schultz -##rka -integrating -##rsa -poses -shrieked -##weiler -guineas -gladys -jerking -owls -goldsmith -nightly -penetrating -##unced -lia -##33 -ignited -betsy -##aring -##thorpe -follower -vigorously -##rave -coded -kiran -knit -zoology -tbilisi -##28 -##bered -repository -govt -deciduous -dino -growling -##bba -enhancement -unleashed -chanting -pussy -biochemistry -##eric -kettle -repression -toxicity -nrhp -##arth -##kko -##bush -ernesto -commended -outspoken -242 -mca -parchment -sms -kristen -##aton -bisexual -raked -glamour -navajo -a2 -conditioned -showcased -##hma -spacious -youthful -##esa -usl -appliances -junta -brest -layne -conglomerate -enchanted -chao -loosened -picasso -circulating -inspect -montevideo -##centric -##kti -piazza -spurred -##aith -bari -freedoms -poultry -stamford -lieu -##ect -indigo -sarcastic -bahia -stump -attach -dvds -frankenstein -lille -approx -scriptures -pollen -##script -nmi -overseen -##ivism -tides -proponent -newmarket -inherit -milling -##erland -centralized -##rou -distributors -credentials -drawers -abbreviation -##lco -##xon -downing -uncomfortably -ripe -##oes -erase -franchises -##ever -populace -##bery -##khar -decomposition -pleas -##tet -daryl -sabah -##stle -##wide -fearless -genie -lesions -annette -##ogist -oboe -appendix -nair -dripped -petitioned -maclean -mosquito -parrot -rpg -hampered -1648 -operatic -reservoirs -##tham -irrelevant -jolt -summarized -##fp -medallion -##taff -##− -clawed -harlow -narrower -goddard -marcia -bodied -fremont -suarez -altering -tempest -mussolini -porn -##isms -sweetly -oversees -walkers -solitude -grimly -shrines -hk -ich -supervisors -hostess -dietrich -legitimacy -brushes -expressive -##yp -dissipated -##rse -localized -systemic -##nikov -gettysburg -##js -##uaries -dialogues -muttering -251 -housekeeper -sicilian -discouraged -##frey -beamed -kaladin -halftime -kidnap -##amo -##llet -1754 -synonymous -depleted -instituto -insulin -reprised -##opsis -clashed -##ctric -interrupting -radcliffe -insisting -medici -1715 -ejected -playfully -turbulent -##47 -starvation -##rini -shipment -rebellious -petersen -verification -merits -##rified -cakes -##charged -1757 -milford -shortages -spying -fidelity -##aker -emitted -storylines -harvested -seismic -##iform -cheung -kilda -theoretically -barbie -lynx -##rgy -##tius -goblin -mata -poisonous -##nburg -reactive -residues -obedience -##евич -conjecture -##rac -401 -hating -sixties -kicker -moaning -motown -##bha -emancipation -neoclassical -##hering -consoles -ebert -professorship -##tures -sustaining -assaults -obeyed -affluent -incurred -tornadoes -##eber -##zow -emphasizing -highlanders -cheated -helmets -##ctus -internship -terence -bony -executions -legislators -berries -peninsular -tinged -##aco -1689 -amplifier -corvette -ribbons -lavish -pennant -##lander -worthless -##chfield -##forms -mariano -pyrenees -expenditures -##icides -chesterfield -mandir -tailor -39th -sergey -nestled -willed -aristocracy -devotees -goodnight -raaf -rumored -weaponry -remy -appropriations -harcourt -burr -riaa -##lence -limitation -unnoticed -guo -soaking -swamps -##tica -collapsing -tatiana -descriptive -brigham -psalm -##chment -maddox -##lization -patti -caliph -##aja -akron -injuring -serra -##ganj -basins -##sari -astonished -launcher -##church -hilary -wilkins -sewing -##sf -stinging -##fia -##ncia -underwood -startup -##ition -compilations -vibrations -embankment -jurist -##nity -bard -juventus -groundwater -kern -palaces -helium -boca -cramped -marissa -soto -##worm -jae -princely -##ggy -faso -bazaar -warmly -##voking -229 -pairing -##lite -##grate -##nets -wien -freaked -ulysses -rebirth -##alia -##rent -mummy -guzman -jimenez -stilled -##nitz -trajectory -tha -woken -archival -professions -##pts -##pta -hilly -shadowy -shrink -##bolt -norwood -glued -migrate -stereotypes -devoid -##pheus -625 -evacuate -horrors -infancy -gotham -knowles -optic -downloaded -sachs -kingsley -parramatta -darryl -mor -##onale -shady -commence -confesses -kan -##meter -##placed -marlborough -roundabout -regents -frigates -io -##imating -gothenburg -revoked -carvings -clockwise -convertible -intruder -##sche -banged -##ogo -vicky -bourgeois -##mony -dupont -footing -##gum -pd -##real -buckle -yun -penthouse -sane -720 -serviced -stakeholders -neumann -bb -##eers -comb -##gam -catchment -pinning -rallies -typing -##elles -forefront -freiburg -sweetie -giacomo -widowed -goodwill -worshipped -aspirations -midday -##vat -fishery -##trick -bournemouth -turk -243 -hearth -ethanol -guadalajara -murmurs -sl -##uge -afforded -scripted -##hta -wah -##jn -coroner -translucent -252 -memorials -puck -progresses -clumsy -##race -315 -candace -recounted -##27 -##slin -##uve -filtering -##mac -howl -strata -heron -leveled -##ays -dubious -##oja -##т -##wheel -citations -exhibiting -##laya -##mics -##pods -turkic -##lberg -injunction -##ennial -##mit -antibodies -##44 -organise -##rigues -cardiovascular -cushion -inverness -##zquez -dia -cocoa -sibling -##tman -##roid -expanse -feasible -tunisian -algiers -##relli -rus -bloomberg -dso -westphalia -bro -tacoma -281 -downloads -##ours -konrad -duran -##hdi -continuum -jett -compares -legislator -secession -##nable -##gues -##zuka -translating -reacher -##gley -##ła -aleppo -##agi -tc -orchards -trapping -linguist -versatile -drumming -postage -calhoun -superiors -##mx -barefoot -leary -##cis -ignacio -alfa -kaplan -##rogen -bratislava -mori -##vot -disturb -haas -313 -cartridges -gilmore -radiated -salford -tunic -hades -##ulsive -archeological -delilah -magistrates -auditioned -brewster -charters -empowerment -blogs -cappella -dynasties -iroquois -whipping -##krishna -raceway -truths -myra -weaken -judah -mcgregor -##horse -mic -refueling -37th -burnley -bosses -markus -premio -query -##gga -dunbar -##economic -darkest -lyndon -sealing -commendation -reappeared -##mun -addicted -ezio -slaughtered -satisfactory -shuffle -##eves -##thic -##uj -fortification -warrington -##otto -resurrected -fargo -mane -##utable -##lei -##space -foreword -ox -##aris -##vern -abrams -hua -##mento -sakura -##alo -uv -sentimental -##skaya -midfield -##eses -sturdy -scrolls -macleod -##kyu -entropy -##lance -mitochondrial -cicero -excelled -thinner -convoys -perceive -##oslav -##urable -systematically -grind -burkina -287 -##tagram -ops -##aman -guantanamo -##cloth -##tite -forcefully -wavy -##jou -pointless -##linger -##tze -layton -portico -superficial -clerical -outlaws -##hism -burials -muir -##inn -creditors -hauling -rattle -##leg -calais -monde -archers -reclaimed -dwell -wexford -hellenic -falsely -remorse -##tek -dough -furnishings -##uttered -gabon -neurological -novice -##igraphy -contemplated -pulpit -nightstand -saratoga -##istan -documenting -pulsing -taluk -##firmed -busted -marital -##rien -disagreements -wasps -##yes -hodge -mcdonnell -mimic -fran -pendant -dhabi -musa -##nington -congratulations -argent -darrell -concussion -losers -regrets -thessaloniki -reversal -donaldson -hardwood -thence -achilles -ritter -##eran -demonic -jurgen -prophets -goethe -eki -classmate -buff -##cking -yank -irrational -##inging -perished -seductive -qur -sourced -##crat -##typic -mustard -ravine -barre -horizontally -characterization -phylogenetic -boise -##dit -##runner -##tower -brutally -intercourse -seduce -##bbing -fay -ferris -ogden -amar -nik -unarmed -##inator -evaluating -kyrgyzstan -sweetness -##lford -##oki -mccormick -meiji -notoriety -stimulate -disrupt -figuring -instructional -mcgrath -##zoo -groundbreaking -##lto -flinch -khorasan -agrarian -bengals -mixer -radiating -##sov -ingram -pitchers -nad -tariff -##cript -tata -##codes -##emi -##ungen -appellate -lehigh -##bled -##giri -brawl -duct -texans -##ciation -##ropolis -skipper -speculative -vomit -doctrines -stresses -253 -davy -graders -whitehead -jozef -timely -cumulative -haryana -paints -appropriately -boon -cactus -##ales -##pid -dow -legions -##pit -perceptions -1730 -picturesque -##yse -periphery -rune -wr -##aha -celtics -sentencing -whoa -##erin -confirms -variance -425 -moines -mathews -spade -rave -m1 -fronted -fx -blending -alleging -reared -##gl -237 -##paper -grassroots -eroded -##free -##physical -directs -ordeal -##sław -accelerate -hacker -rooftop -##inia -lev -buys -cebu -devote -##lce -specialising -##ulsion -choreographed -repetition -warehouses -##ryl -paisley -tuscany -analogy -sorcerer -hash -huts -shards -descends -exclude -nix -chaplin -gaga -ito -vane -##drich -causeway -misconduct -limo -orchestrated -glands -jana -##kot -u2 -##mple -##sons -branching -contrasts -scoop -longed -##virus -chattanooga -##75 -syrup -cornerstone -##tized -##mind -##iaceae -careless -precedence -frescoes -##uet -chilled -consult -modelled -snatch -peat -##thermal -caucasian -humane -relaxation -spins -temperance -##lbert -occupations -lambda -hybrids -moons -mp3 -##oese -247 -rolf -societal -yerevan -ness -##ssler -befriended -mechanized -nominate -trough -boasted -cues -seater -##hom -bends -##tangle -conductors -emptiness -##lmer -eurasian -adriatic -tian -##cie -anxiously -lark -propellers -chichester -jock -ev -2a -##holding -credible -recounts -tori -loyalist -abduction -##hoot -##redo -nepali -##mite -ventral -tempting -##ango -##crats -steered -##wice -javelin -dipping -laborers -prentice -looming -titanium -##ː -badges -emir -tensor -##ntation -egyptians -rash -denies -hawthorne -lombard -showers -wehrmacht -dietary -trojan -##reus -welles -executing -horseshoe -lifeboat -##lak -elsa -infirmary -nearing -roberta -boyer -mutter -trillion -joanne -##fine -##oked -sinks -vortex -uruguayan -clasp -sirius -##block -accelerator -prohibit -sunken -byu -chronological -diplomats -ochreous -510 -symmetrical -1644 -maia -##tology -salts -reigns -atrocities -##ия -hess -bared -issn -##vyn -cater -saturated -##cycle -##isse -sable -voyager -dyer -yusuf -##inge -fountains -wolff -##39 -##nni -engraving -rollins -atheist -ominous -##ault -herr -chariot -martina -strung -##fell -##farlane -horrific -sahib -gazes -saetan -erased -ptolemy -##olic -flushing -lauderdale -analytic -##ices -530 -navarro -beak -gorilla -herrera -broom -guadalupe -raiding -sykes -311 -bsc -deliveries -1720 -invasions -carmichael -tajikistan -thematic -ecumenical -sentiments -onstage -##rians -##brand -##sume -catastrophic -flanks -molten -##arns -waller -aimee -terminating -##icing -alternately -##oche -nehru -printers -outraged -##eving -empires -template -banners -repetitive -za -##oise -vegetarian -##tell -guiana -opt -cavendish -lucknow -synthesized -##hani -##mada -finalized -##ctable -fictitious -mayoral -unreliable -##enham -embracing -peppers -rbis -##chio -##neo -inhibition -slashed -togo -orderly -embroidered -safari -salty -236 -barron -benito -totaled -##dak -pubs -simulated -caden -devin -tolkien -momma -welding -sesame -##ept -gottingen -hardness -630 -shaman -temeraire -620 -adequately -pediatric -##kit -ck -assertion -radicals -composure -cadence -seafood -beaufort -lazarus -mani -warily -cunning -kurdistan -249 -cantata -##kir -ares -##41 -##clusive -nape -townland -geared -insulted -flutter -boating -violate -draper -dumping -malmo -##hh -##romatic -firearm -alta -bono -obscured -##clave -exceeds -panorama -unbelievable -##train -preschool -##essed -disconnected -installing -rescuing -secretaries -accessibility -##castle -##drive -##ifice -##film -bouts -slug -waterway -mindanao -##buro -##ratic -halves -##ل -calming -liter -maternity -adorable -bragg -electrification -mcc -##dote -roxy -schizophrenia -##body -munoz -kaye -whaling -239 -mil -tingling -tolerant -##ago -unconventional -volcanoes -##finder -deportivo -##llie -robson -kaufman -neuroscience -wai -deportation -masovian -scraping -converse -##bh -hacking -bulge -##oun -administratively -yao -580 -amp -mammoth -booster -claremont -hooper -nomenclature -pursuits -mclaughlin -melinda -##sul -catfish -barclay -substrates -taxa -zee -originals -kimberly -packets -padma -##ality -borrowing -ostensibly -solvent -##bri -##genesis -##mist -lukas -shreveport -veracruz -##ь -##lou -##wives -cheney -tt -anatolia -hobbs -##zyn -cyclic -radiant -alistair -greenish -siena -dat -independents -##bation -conform -pieter -hyper -applicant -bradshaw -spores -telangana -vinci -inexpensive -nuclei -322 -jang -nme -soho -spd -##ign -cradled -receptionist -pow -##43 -##rika -fascism -##ifer -experimenting -##ading -##iec -##region -345 -jocelyn -maris -stair -nocturnal -toro -constabulary -elgin -##kker -msc -##giving -##schen -##rase -doherty -doping -sarcastically -batter -maneuvers -##cano -##apple -##gai -##git -intrinsic -##nst -##stor -1753 -showtime -cafes -gasps -lviv -ushered -##thed -fours -restart -astonishment -transmitting -flyer -shrugs -##sau -intriguing -cones -dictated -mushrooms -medial -##kovsky -##elman -escorting -gaped -##26 -godfather -##door -##sell -djs -recaptured -timetable -vila -1710 -3a -aerodrome -mortals -scientology -##orne -angelina -mag -convection -unpaid -insertion -intermittent -lego -##nated -endeavor -kota -pereira -##lz -304 -bwv -glamorgan -insults -agatha -fey -##cend -fleetwood -mahogany -protruding -steamship -zeta -##arty -mcguire -suspense -##sphere -advising -urges -##wala -hurriedly -meteor -gilded -inline -arroyo -stalker -##oge -excitedly -revered -##cure -earle -introductory -##break -##ilde -mutants -puff -pulses -reinforcement -##haling -curses -lizards -stalk -correlated -##fixed -fallout -macquarie -##unas -bearded -denton -heaving -802 -##ocation -winery -assign -dortmund -##lkirk -everest -invariant -charismatic -susie -##elling -bled -lesley -telegram -sumner -bk -##ogen -##к -wilcox -needy -colbert -duval -##iferous -##mbled -allotted -attends -imperative -##hita -replacements -hawker -##inda -insurgency -##zee -##eke -casts -##yla -680 -ives -transitioned -##pack -##powering -authoritative -baylor -flex -cringed -plaintiffs -woodrow -##skie -drastic -ape -aroma -unfolded -commotion -nt -preoccupied -theta -routines -lasers -privatization -wand -domino -ek -clenching -nsa -strategically -showered -bile -handkerchief -pere -storing -christophe -insulting -316 -nakamura -romani -asiatic -magdalena -palma -cruises -stripping -405 -konstantin -soaring -##berman -colloquially -forerunner -havilland -incarcerated -parasites -sincerity -##utus -disks -plank -saigon -##ining -corbin -homo -ornaments -powerhouse -##tlement -chong -fastened -feasibility -idf -morphological -usable -##nish -##zuki -aqueduct -jaguars -keepers -##flies -aleksandr -faust -assigns -ewing -bacterium -hurled -tricky -hungarians -integers -wallis -321 -yamaha -##isha -hushed -oblivion -aviator -evangelist -friars -##eller -monograph -ode -##nary -airplanes -labourers -charms -##nee -1661 -hagen -tnt -rudder -fiesta -transcript -dorothea -ska -inhibitor -maccabi -retorted -raining -encompassed -clauses -menacing -1642 -lineman -##gist -vamps -##ape -##dick -gloom -##rera -dealings -easing -seekers -##nut -##pment -helens -unmanned -##anu -##isson -basics -##amy -##ckman -adjustments -1688 -brutality -horne -##zell -sui -##55 -##mable -aggregator -##thal -rhino -##drick -##vira -counters -zoom -##01 -##rting -mn -montenegrin -packard -##unciation -##♭ -##kki -reclaim -scholastic -thugs -pulsed -##icia -syriac -quan -saddam -banda -kobe -blaming -buddies -dissent -##lusion -##usia -corbett -jaya -delle -erratic -lexie -##hesis -435 -amiga -hermes -##pressing -##leen -chapels -gospels -jamal -##uating -compute -revolving -warp -##sso -##thes -armory -##eras -##gol -antrim -loki -##kow -##asian -##good -##zano -braid -handwriting -subdistrict -funky -pantheon -##iculate -concurrency -estimation -improper -juliana -##his -newcomers -johnstone -staten -communicated -##oco -##alle -sausage -stormy -##stered -##tters -superfamily -##grade -acidic -collateral -tabloid -##oped -##rza -bladder -austen -##ellant -mcgraw -##hay -hannibal -mein -aquino -lucifer -wo -badger -boar -cher -christensen -greenberg -interruption -##kken -jem -244 -mocked -bottoms -cambridgeshire -##lide -sprawling -##bbly -eastwood -ghent -synth -##buck -advisers -##bah -nominally -hapoel -qu -daggers -estranged -fabricated -towels -vinnie -wcw -misunderstanding -anglia -nothin -unmistakable -##dust -##lova -chilly -marquette -truss -##edge -##erine -reece -##lty -##chemist -##connected -272 -308 -41st -bash -raion -waterfalls -##ump -##main -labyrinth -queue -theorist -##istle -bharatiya -flexed -soundtracks -rooney -leftist -patrolling -wharton -plainly -alleviate -eastman -schuster -topographic -engages -immensely -unbearable -fairchild -1620 -dona -lurking -parisian -oliveira -ia -indictment -hahn -bangladeshi -##aster -vivo -##uming -##ential -antonia -expects -indoors -kildare -harlan -##logue -##ogenic -##sities -forgiven -##wat -childish -tavi -##mide -##orra -plausible -grimm -successively -scooted -##bola -##dget -##rith -spartans -emery -flatly -azure -epilogue -##wark -flourish -##iny -##tracted -##overs -##oshi -bestseller -distressed -receipt -spitting -hermit -topological -##cot -drilled -subunit -francs -##layer -eel -##fk -##itas -octopus -footprint -petitions -ufo -##say -##foil -interfering -leaking -palo -##metry -thistle -valiant -##pic -narayan -mcpherson -##fast -gonzales -##ym -##enne -dustin -novgorod -solos -##zman -doin -##raph -##patient -##meyer -soluble -ashland -cuffs -carole -pendleton -whistling -vassal -##river -deviation -revisited -constituents -rallied -rotate -loomed -##eil -##nting -amateurs -augsburg -auschwitz -crowns -skeletons -##cona -bonnet -257 -dummy -globalization -simeon -sleeper -mandal -differentiated -##crow -##mare -milne -bundled -exasperated -talmud -owes -segregated -##feng -##uary -dentist -piracy -props -##rang -devlin -##torium -malicious -paws -##laid -dependency -##ergy -##fers -##enna -258 -pistons -rourke -jed -grammatical -tres -maha -wig -512 -ghostly -jayne -##achal -##creen -##ilis -##lins -##rence -designate -##with -arrogance -cambodian -clones -showdown -throttle -twain -##ception -lobes -metz -nagoya -335 -braking -##furt -385 -roaming -##minster -amin -crippled -##37 -##llary -indifferent -hoffmann -idols -intimidating -1751 -261 -influenza -memo -onions -1748 -bandage -consciously -##landa -##rage -clandestine -observes -swiped -tangle -##ener -##jected -##trum -##bill -##lta -hugs -congresses -josiah -spirited -##dek -humanist -managerial -filmmaking -inmate -rhymes -debuting -grimsby -ur -##laze -duplicate -vigor -##tf -republished -bolshevik -refurbishment -antibiotics -martini -methane -newscasts -royale -horizons -levant -iain -visas -##ischen -paler -##around -manifestation -snuck -alf -chop -futile -pedestal -rehab -##kat -bmg -kerman -res -fairbanks -jarrett -abstraction -saharan -##zek -1746 -procedural -clearer -kincaid -sash -luciano -##ffey -crunch -helmut -##vara -revolutionaries -##tute -creamy -leach -##mmon -1747 -permitting -nes -plight -wendell -##lese -contra -ts -clancy -ipa -mach -staples -autopsy -disturbances -nueva -karin -pontiac -##uding -proxy -venerable -haunt -leto -bergman -expands -##helm -wal -##pipe -canning -celine -cords -obesity -##enary -intrusion -planner -##phate -reasoned -sequencing -307 -harrow -##chon -##dora -marred -mcintyre -repay -tarzan -darting -248 -harrisburg -margarita -repulsed -##hur -##lding -belinda -hamburger -novo -compliant -runways -bingham -registrar -skyscraper -ic -cuthbert -improvisation -livelihood -##corp -##elial -admiring -##dened -sporadic -believer -casablanca -popcorn -##29 -asha -shovel -##bek -##dice -coiled -tangible -##dez -casper -elsie -resin -tenderness -rectory -##ivision -avail -sonar -##mori -boutique -##dier -guerre -bathed -upbringing -vaulted -sandals -blessings -##naut -##utnant -1680 -306 -foxes -pia -corrosion -hesitantly -confederates -crystalline -footprints -shapiro -tirana -valentin -drones -45th -microscope -shipments -texted -inquisition -wry -guernsey -unauthorized -resigning -760 -ripple -schubert -stu -reassure -felony -##ardo -brittle -koreans -##havan -##ives -dun -implicit -tyres -##aldi -##lth -magnolia -##ehan -##puri -##poulos -aggressively -fei -gr -familiarity -##poo -indicative -##trust -fundamentally -jimmie -overrun -395 -anchors -moans -##opus -britannia -armagh -##ggle -purposely -seizing -##vao -bewildered -mundane -avoidance -cosmopolitan -geometridae -quartermaster -caf -415 -chatter -engulfed -gleam -purge -##icate -juliette -jurisprudence -guerra -revisions -##bn -casimir -brew -##jm -1749 -clapton -cloudy -conde -hermitage -278 -simulations -torches -vincenzo -matteo -##rill -hidalgo -booming -westbound -accomplishment -tentacles -unaffected -##sius -annabelle -flopped -sloping -##litz -dreamer -interceptor -vu -##loh -consecration -copying -messaging -breaker -climates -hospitalized -1752 -torino -afternoons -winfield -witnessing -##teacher -breakers -choirs -sawmill -coldly -##ege -sipping -haste -uninhabited -conical -bibliography -pamphlets -severn -edict -##oca -deux -illnesses -grips -##pl -rehearsals -sis -thinkers -tame -##keepers -1690 -acacia -reformer -##osed -##rys -shuffling -##iring -##shima -eastbound -ionic -rhea -flees -littered -##oum -rocker -vomiting -groaning -champ -overwhelmingly -civilizations -paces -sloop -adoptive -##tish -skaters -##vres -aiding -mango -##joy -nikola -shriek -##ignon -pharmaceuticals -##mg -tuna -calvert -gustavo -stocked -yearbook -##urai -##mana -computed -subsp -riff -hanoi -kelvin -hamid -moors -pastures -summons -jihad -nectar -##ctors -bayou -untitled -pleasing -vastly -republics -intellect -##η -##ulio -##tou -crumbling -stylistic -sb -##ی -consolation -frequented -h₂o -walden -widows -##iens -404 -##ignment -chunks -improves -288 -grit -recited -##dev -snarl -sociological -##arte -##gul -inquired -##held -bruise -clube -consultancy -homogeneous -hornets -multiplication -pasta -prick -savior -##grin -##kou -##phile -yoon -##gara -grimes -vanishing -cheering -reacting -bn -distillery -##quisite -##vity -coe -dockyard -massif -##jord -escorts -voss -##valent -byte -chopped -hawke -illusions -workings -floats -##koto -##vac -kv -annapolis -madden -##onus -alvaro -noctuidae -##cum -##scopic -avenge -steamboat -forte -illustrates -erika -##trip -570 -dew -nationalities -bran -manifested -thirsty -diversified -muscled -reborn -##standing -arson -##lessness -##dran -##logram -##boys -##kushima -##vious -willoughby -##phobia -286 -alsace -dashboard -yuki -##chai -granville -myspace -publicized -tricked -##gang -adjective -##ater -relic -reorganisation -enthusiastically -indications -saxe -##lassified -consolidate -iec -padua -helplessly -ramps -renaming -regulars -pedestrians -accents -convicts -inaccurate -lowers -mana -##pati -barrie -bjp -outta -someplace -berwick -flanking -invoked -marrow -sparsely -excerpts -clothed -rei -##ginal -wept -##straße -##vish -alexa -excel -##ptive -membranes -aquitaine -creeks -cutler -sheppard -implementations -ns -##dur -fragrance -budge -concordia -magnesium -marcelo -##antes -gladly -vibrating -##rral -##ggles -montrose -##omba -lew -seamus -1630 -cocky -##ament -##uen -bjorn -##rrick -fielder -fluttering -##lase -methyl -kimberley -mcdowell -reductions -barbed -##jic -##tonic -aeronautical -condensed -distracting -##promising -huffed -##cala -##sle -claudius -invincible -missy -pious -balthazar -ci -##lang -butte -combo -orson -##dication -myriad -1707 -silenced -##fed -##rh -coco -netball -yourselves -##oza -clarify -heller -peg -durban -etudes -offender -roast -blackmail -curvature -##woods -vile -309 -illicit -suriname -##linson -overture -1685 -bubbling -gymnast -tucking -##mming -##ouin -maldives -##bala -gurney -##dda -##eased -##oides -backside -pinto -jars -racehorse -tending -##rdial -baronetcy -wiener -duly -##rke -barbarian -cupping -flawed -##thesis -bertha -pleistocene -puddle -swearing -##nob -##tically -fleeting -prostate -amulet -educating -##mined -##iti -##tler -75th -jens -respondents -analytics -cavaliers -papacy -raju -##iente -##ulum -##tip -funnel -271 -disneyland -##lley -sociologist -##iam -2500 -faulkner -louvre -menon -##dson -276 -##ower -afterlife -mannheim -peptide -referees -comedians -meaningless -##anger -##laise -fabrics -hurley -renal -sleeps -##bour -##icle -breakout -kristin -roadside -animator -clover -disdain -unsafe -redesign -##urity -firth -barnsley -portage -reset -narrows -268 -commandos -expansive -speechless -tubular -##lux -essendon -eyelashes -smashwords -##yad -##bang -##claim -craved -sprinted -chet -somme -astor -wrocław -orton -266 -bane -##erving -##uing -mischief -##amps -##sund -scaling -terre -##xious -impairment -offenses -undermine -moi -soy -contiguous -arcadia -inuit -seam -##tops -macbeth -rebelled -##icative -##iot -590 -elaborated -frs -uniformed -##dberg -259 -powerless -priscilla -stimulated -980 -qc -arboretum -frustrating -trieste -bullock -##nified -enriched -glistening -intern -##adia -locus -nouvelle -ollie -ike -lash -starboard -ee -tapestry -headlined -hove -rigged -##vite -pollock -##yme -thrive -clustered -cas -roi -gleamed -olympiad -##lino -pressured -regimes -##hosis -##lick -ripley -##ophone -kickoff -gallon -rockwell -##arable -crusader -glue -revolutions -scrambling -1714 -grover -##jure -englishman -aztec -263 -contemplating -coven -ipad -preach -triumphant -tufts -##esian -rotational -##phus -328 -falkland -##brates -strewn -clarissa -rejoin -environmentally -glint -banded -drenched -moat -albanians -johor -rr -maestro -malley -nouveau -shaded -taxonomy -v6 -adhere -bunk -airfields -##ritan -1741 -encompass -remington -tran -##erative -amelie -mazda -friar -morals -passions -##zai -breadth -vis -##hae -argus -burnham -caressing -insider -rudd -##imov -##mini -##rso -italianate -murderous -textual -wainwright -armada -bam -weave -timer -##taken -##nh -fra -##crest -ardent -salazar -taps -tunis -##ntino -allegro -gland -philanthropic -##chester -implication -##optera -esq -judas -noticeably -wynn -##dara -inched -indexed -crises -villiers -bandit -royalties -patterned -cupboard -interspersed -accessory -isla -kendrick -entourage -stitches -##esthesia -headwaters -##ior -interlude -distraught -draught -1727 -##basket -biased -sy -transient -triad -subgenus -adapting -kidd -shortstop -##umatic -dimly -spiked -mcleod -reprint -nellie -pretoria -windmill -##cek -singled -##mps -273 -reunite -##orous -747 -bankers -outlying -##omp -##ports -##tream -apologies -cosmetics -patsy -##deh -##ocks -##yson -bender -nantes -serene -##nad -lucha -mmm -323 -##cius -##gli -cmll -coinage -nestor -juarez -##rook -smeared -sprayed -twitching -sterile -irina -embodied -juveniles -enveloped -miscellaneous -cancers -dq -gulped -luisa -crested -swat -donegal -ref -##anov -##acker -hearst -mercantile -##lika -doorbell -ua -vicki -##alla -##som -bilbao -psychologists -stryker -sw -horsemen -turkmenistan -wits -##national -anson -mathew -screenings -##umb -rihanna -##agne -##nessy -aisles -##iani -##osphere -hines -kenton -saskatoon -tasha -truncated -##champ -##itan -mildred -advises -fredrik -interpreting -inhibitors -##athi -spectroscopy -##hab -##kong -karim -panda -##oia -##nail -##vc -conqueror -kgb -leukemia -##dity -arrivals -cheered -pisa -phosphorus -shielded -##riated -mammal -unitarian -urgently -chopin -sanitary -##mission -spicy -drugged -hinges -##tort -tipping -trier -impoverished -westchester -##caster -267 -epoch -nonstop -##gman -##khov -aromatic -centrally -cerro -##tively -##vio -billions -modulation -sedimentary -283 -facilitating -outrageous -goldstein -##eak -##kt -ld -maitland -penultimate -pollard -##dance -fleets -spaceship -vertebrae -##nig -alcoholism -als -recital -##bham -##ference -##omics -m2 -##bm -trois -##tropical -##в -commemorates -##meric -marge -##raction -1643 -670 -cosmetic -ravaged -##ige -catastrophe -eng -##shida -albrecht -arterial -bellamy -decor -harmon -##rde -bulbs -synchronized -vito -easiest -shetland -shielding -wnba -##glers -##ssar -##riam -brianna -cumbria -##aceous -##rard -cores -thayer -##nsk -brood -hilltop -luminous -carts -keynote -larkin -logos -##cta -##ا -##mund -##quay -lilith -tinted -277 -wrestle -mobilization -##uses -sequential -siam -bloomfield -takahashi -274 -##ieving -presenters -ringo -blazed -witty -##oven -##ignant -devastation -haydn -harmed -newt -therese -##peed -gershwin -molina -rabbis -sudanese -001 -innate -restarted -##sack -##fus -slices -wb -##shah -enroll -hypothetical -hysterical -1743 -fabio -indefinite -warped -##hg -exchanging -525 -unsuitable -##sboro -gallo -1603 -bret -cobalt -homemade -##hunter -mx -operatives -##dhar -terraces -durable -latch -pens -whorls -##ctuated -##eaux -billing -ligament -succumbed -##gly -regulators -spawn -##brick -##stead -filmfare -rochelle -##nzo -1725 -circumstance -saber -supplements -##nsky -##tson -crowe -wellesley -carrot -##9th -##movable -primate -drury -sincerely -topical -##mad -##rao -callahan -kyiv -smarter -tits -undo -##yeh -announcements -anthologies -barrio -nebula -##islaus -##shaft -##tyn -bodyguards -2021 -assassinate -barns -emmett -scully -##mah -##yd -##eland -##tino -##itarian -demoted -gorman -lashed -prized -adventist -writ -##gui -alla -invertebrates -##ausen -1641 -amman -1742 -align -healy -redistribution -##gf -##rize -insulation -##drop -adherents -hezbollah -vitro -ferns -yanking -269 -php -registering -uppsala -cheerleading -confines -mischievous -tully -##ross -49th -docked -roam -stipulated -pumpkin -##bry -prompt -##ezer -blindly -shuddering -craftsmen -frail -scented -katharine -scramble -shaggy -sponge -helix -zaragoza -279 -##52 -43rd -backlash -fontaine -seizures -posse -cowan -nonfiction -telenovela -wwii -hammered -undone -##gpur -encircled -irs -##ivation -artefacts -oneself -searing -smallpox -##belle -##osaurus -shandong -breached -upland -blushing -rankin -infinitely -psyche -tolerated -docking -evicted -##col -unmarked -##lving -gnome -lettering -litres -musique -##oint -benevolent -##jal -blackened -##anna -mccall -racers -tingle -##ocene -##orestation -introductions -radically -292 -##hiff -##باد -1610 -1739 -munchen -plead -##nka -condo -scissors -##sight -##tens -apprehension -##cey -##yin -hallmark -watering -formulas -sequels -##llas -aggravated -bae -commencing -##building -enfield -prohibits -marne -vedic -civilized -euclidean -jagger -beforehand -blasts -dumont -##arney -##nem -740 -conversions -hierarchical -rios -simulator -##dya -##lellan -hedges -oleg -thrusts -shadowed -darby -maximize -1744 -gregorian -##nded -##routed -sham -unspecified -##hog -emory -factual -##smo -##tp -fooled -##rger -ortega -wellness -marlon -##oton -##urance -casket -keating -ley -enclave -##ayan -char -influencing -jia -##chenko -412 -ammonia -erebidae -incompatible -violins -cornered -##arat -grooves -astronauts -columbian -rampant -fabrication -kyushu -mahmud -vanish -##dern -mesopotamia -##lete -ict -##rgen -caspian -kenji -pitted -##vered -999 -grimace -roanoke -tchaikovsky -twinned -##analysis -##awan -xinjiang -arias -clemson -kazakh -sizable -1662 -##khand -##vard -plunge -tatum -vittorio -##nden -cholera -##dana -##oper -bracing -indifference -projectile -superliga -##chee -realises -upgrading -299 -porte -retribution -##vies -nk -stil -##resses -ama -bureaucracy -blackberry -bosch -testosterone -collapses -greer -##pathic -ioc -fifties -malls -##erved -bao -baskets -adolescents -siegfried -##osity -##tosis -mantra -detecting -existent -fledgling -##cchi -dissatisfied -gan -telecommunication -mingled -sobbed -6000 -controversies -outdated -taxis -##raus -fright -slams -##lham -##fect -##tten -detectors -fetal -tanned -##uw -fray -goth -olympian -skipping -mandates -scratches -sheng -unspoken -hyundai -tracey -hotspur -restrictive -##buch -americana -mundo -##bari -burroughs -diva -vulcan -##6th -distinctions -thumping -##ngen -mikey -sheds -fide -rescues -springsteen -vested -valuation -##ece -##ely -pinnacle -rake -sylvie -##edo -almond -quivering -##irus -alteration -faltered -##wad -51st -hydra -ticked -##kato -recommends -##dicated -antigua -arjun -stagecoach -wilfred -trickle -pronouns -##pon -aryan -nighttime -##anian -gall -pea -stitch -##hei -leung -milos -##dini -eritrea -nexus -starved -snowfall -kant -parasitic -cot -discus -hana -strikers -appleton -kitchens -##erina -##partisan -##itha -##vius -disclose -metis -##channel -1701 -tesla -##vera -fitch -1735 -blooded -##tila -decimal -##tang -##bai -cyclones -eun -bottled -peas -pensacola -basha -bolivian -crabs -boil -lanterns -partridge -roofed -1645 -necks -##phila -opined -patting -##kla -##lland -chuckles -volta -whereupon -##nche -devout -euroleague -suicidal -##dee -inherently -involuntary -knitting -nasser -##hide -puppets -colourful -courageous -southend -stills -miraculous -hodgson -richer -rochdale -ethernet -greta -uniting -prism -umm -##haya -##itical -##utation -deterioration -pointe -prowess -##ropriation -lids -scranton -billings -subcontinent -##koff -##scope -brute -kellogg -psalms -degraded -##vez -stanisław -##ructured -ferreira -pun -astonishing -gunnar -##yat -arya -prc -gottfried -##tight -excursion -##ographer -dina -##quil -##nare -huffington -illustrious -wilbur -gundam -verandah -##zard -naacp -##odle -constructive -fjord -kade -##naud -generosity -thrilling -baseline -cayman -frankish -plastics -accommodations -zoological -##fting -cedric -qb -motorized -##dome -##otted -squealed -tackled -canucks -budgets -situ -asthma -dail -gabled -grasslands -whimpered -writhing -judgments -##65 -minnie -pv -##carbon -bananas -grille -domes -monique -odin -maguire -markham -tierney -##estra -##chua -libel -poke -speedy -atrium -laval -notwithstanding -##edly -fai -kala -##sur -robb -##sma -listings -luz -supplementary -tianjin -##acing -enzo -jd -ric -scanner -croats -transcribed -##49 -arden -cv -##hair -##raphy -##lver -##uy -357 -seventies -staggering -alam -horticultural -hs -regression -timbers -blasting -##ounded -montagu -manipulating -##cit -catalytic -1550 -troopers -##meo -condemnation -fitzpatrick -##oire -##roved -inexperienced -1670 -castes -##lative -outing -314 -dubois -flicking -quarrel -ste -learners -1625 -iq -whistled -##class -282 -classify -tariffs -temperament -355 -folly -liszt -##yles -immersed -jordanian -ceasefire -apparel -extras -maru -fished -##bio -harta -stockport -assortment -craftsman -paralysis -transmitters -##cola -blindness -##wk -fatally -proficiency -solemnly -##orno -repairing -amore -groceries -ultraviolet -##chase -schoolhouse -##tua -resurgence -nailed -##otype -##× -ruse -saliva -diagrams -##tructing -albans -rann -thirties -1b -antennas -hilarious -cougars -paddington -stats -##eger -breakaway -ipod -reza -authorship -prohibiting -scoffed -##etz -##ttle -conscription -defected -trondheim -##fires -ivanov -keenan -##adan -##ciful -##fb -##slow -locating -##ials -##tford -cadiz -basalt -blankly -interned -rags -rattling -##tick -carpathian -reassured -sync -bum -guildford -iss -staunch -##onga -astronomers -sera -sofie -emergencies -susquehanna -##heard -duc -mastery -vh1 -williamsburg -bayer -buckled -craving -##khan -##rdes -bloomington -##write -alton -barbecue -##bians -justine -##hri -##ndt -delightful -smartphone -newtown -photon -retrieval -peugeot -hissing -##monium -##orough -flavors -lighted -relaunched -tainted -##games -##lysis -anarchy -microscopic -hopping -adept -evade -evie -##beau -inhibit -sinn -adjustable -hurst -intuition -wilton -cisco -44th -lawful -lowlands -stockings -thierry -##dalen -##hila -##nai -fates -prank -tb -maison -lobbied -provocative -1724 -4a -utopia -##qual -carbonate -gujarati -purcell -##rford -curtiss -##mei -overgrown -arenas -mediation -swallows -##rnik -respectful -turnbull -##hedron -##hope -alyssa -ozone -##ʻi -ami -gestapo -johansson -snooker -canteen -cuff -declines -empathy -stigma -##ags -##iner -##raine -taxpayers -gui -volga -##wright -##copic -lifespan -overcame -tattooed -enactment -giggles -##ador -##camp -barrington -bribe -obligatory -orbiting -peng -##enas -elusive -sucker -##vating -cong -hardship -empowered -anticipating -estrada -cryptic -greasy -detainees -planck -sudbury -plaid -dod -marriott -kayla -##ears -##vb -##zd -mortally -##hein -cognition -radha -319 -liechtenstein -meade -richly -argyle -harpsichord -liberalism -trumpets -lauded -tyrant -salsa -tiled -lear -promoters -reused -slicing -trident -##chuk -##gami -##lka -cantor -checkpoint -##points -gaul -leger -mammalian -##tov -##aar -##schaft -doha -frenchman -nirvana -##vino -delgado -headlining -##eron -##iography -jug -tko -1649 -naga -intersections -##jia -benfica -nawab -##suka -ashford -gulp -##deck -##vill -##rug -brentford -frazier -pleasures -dunne -potsdam -shenzhen -dentistry -##tec -flanagan -##dorff -##hear -chorale -dinah -prem -quezon -##rogated -relinquished -sutra -terri -##pani -flaps -##rissa -poly -##rnet -homme -aback -##eki -linger -womb -##kson -##lewood -doorstep -orthodoxy -threaded -westfield -##rval -dioceses -fridays -subsided -##gata -loyalists -##biotic -##ettes -letterman -lunatic -prelate -tenderly -invariably -souza -thug -winslow -##otide -furlongs -gogh -jeopardy -##runa -pegasus -##umble -humiliated -standalone -tagged -##roller -freshmen -klan -##bright -attaining -initiating -transatlantic -logged -viz -##uance -1723 -combatants -intervening -stephane -chieftain -despised -grazed -317 -cdc -galveston -godzilla -macro -simulate -##planes -parades -##esses -960 -##ductive -##unes -equator -overdose -##cans -##hosh -##lifting -joshi -epstein -sonora -treacherous -aquatics -manchu -responsive -##sation -supervisory -##christ -##llins -##ibar -##balance -##uso -kimball -karlsruhe -mab -##emy -ignores -phonetic -reuters -spaghetti -820 -almighty -danzig -rumbling -tombstone -designations -lured -outset -##felt -supermarkets -##wt -grupo -kei -kraft -susanna -##blood -comprehension -genealogy -##aghan -##verted -redding -##ythe -1722 -bowing -##pore -##roi -lest -sharpened -fulbright -valkyrie -sikhs -##unds -swans -bouquet -merritt -##tage -##venting -commuted -redhead -clerks -leasing -cesare -dea -hazy -##vances -fledged -greenfield -servicemen -##gical -armando -blackout -dt -sagged -downloadable -intra -potion -pods -##4th -##mism -xp -attendants -gambia -stale -##ntine -plump -asteroids -rediscovered -buds -flea -hive -##neas -1737 -classifications -debuts -##eles -olympus -scala -##eurs -##gno -##mute -hummed -sigismund -visuals -wiggled -await -pilasters -clench -sulfate -##ances -bellevue -enigma -trainee -snort -##sw -clouded -denim -##rank -##rder -churning -hartman -lodges -riches -sima -##missible -accountable -socrates -regulates -mueller -##cr -1702 -avoids -solids -himalayas -nutrient -pup -##jevic -squat -fades -nec -##lates -##pina -##rona -##ου -privateer -tequila -##gative -##mpton -apt -hornet -immortals -##dou -asturias -cleansing -dario -##rries -##anta -etymology -servicing -zhejiang -##venor -##nx -horned -erasmus -rayon -relocating -£10 -##bags -escalated -promenade -stubble -2010s -artisans -axial -liquids -mora -sho -yoo -##tsky -bundles -oldies -##nally -notification -bastion -##ths -sparkle -##lved -1728 -leash -pathogen -highs -##hmi -immature -880 -gonzaga -ignatius -mansions -monterrey -sweets -bryson -##loe -polled -regatta -brightest -pei -rosy -squid -hatfield -payroll -addict -meath -cornerback -heaviest -lodging -##mage -capcom -rippled -##sily -barnet -mayhem -ymca -snuggled -rousseau -##cute -blanchard -284 -fragmented -leighton -chromosomes -risking -##md -##strel -##utter -corinne -coyotes -cynical -hiroshi -yeomanry -##ractive -ebook -grading -mandela -plume -agustin -magdalene -##rkin -bea -femme -trafford -##coll -##lun -##tance -52nd -fourier -upton -##mental -camilla -gust -iihf -islamabad -longevity -##kala -feldman -netting -##rization -endeavour -foraging -mfa -orr -##open -greyish -contradiction -graz -##ruff -handicapped -marlene -tweed -oaxaca -spp -campos -miocene -pri -configured -cooks -pluto -cozy -pornographic -##entes -70th -fairness -glided -jonny -lynne -rounding -sired -##emon -##nist -remade -uncover -##mack -complied -lei -newsweek -##jured -##parts -##enting -##pg -293 -finer -guerrillas -athenian -deng -disused -stepmother -accuse -gingerly -seduction -521 -confronting -##walker -##going -gora -nostalgia -sabres -virginity -wrenched -##minated -syndication -wielding -eyre -##56 -##gnon -##igny -behaved -taxpayer -sweeps -##growth -childless -gallant -##ywood -amplified -geraldine -scrape -##ffi -babylonian -fresco -##rdan -##kney -##position -1718 -restricting -tack -fukuoka -osborn -selector -partnering -##dlow -318 -gnu -kia -tak -whitley -gables -##54 -##mania -mri -softness -immersion -##bots -##evsky -1713 -chilling -insignificant -pcs -##uis -elites -lina -purported -supplemental -teaming -##americana -##dding -##inton -proficient -rouen -##nage -##rret -niccolo -selects -##bread -fluffy -1621 -gruff -knotted -mukherjee -polgara -thrash -nicholls -secluded -smoothing -thru -corsica -loaf -whitaker -inquiries -##rrier -##kam -indochina -289 -marlins -myles -peking -##tea -extracts -pastry -superhuman -connacht -vogel -##ditional -##het -##udged -##lash -gloss -quarries -refit -teaser -##alic -##gaon -20s -materialized -sling -camped -pickering -tung -tracker -pursuant -##cide -cranes -soc -##cini -##typical -##viere -anhalt -overboard -workout -chores -fares -orphaned -stains -##logie -fenton -surpassing -joyah -triggers -##itte -grandmaster -##lass -##lists -clapping -fraudulent -ledger -nagasaki -##cor -##nosis -##tsa -eucalyptus -tun -##icio -##rney -##tara -dax -heroism -ina -wrexham -onboard -unsigned -##dates -moshe -galley -winnie -droplets -exiles -praises -watered -noodles -##aia -fein -adi -leland -multicultural -stink -bingo -comets -erskine -modernized -canned -constraint -domestically -chemotherapy -featherweight -stifled -##mum -darkly -irresistible -refreshing -hasty -isolate -##oys -kitchener -planners -##wehr -cages -yarn -implant -toulon -elects -childbirth -yue -##lind -##lone -cn -rightful -sportsman -junctions -remodeled -specifies -##rgh -291 -##oons -complimented -##urgent -lister -ot -##logic -bequeathed -cheekbones -fontana -gabby -##dial -amadeus -corrugated -maverick -resented -triangles -##hered -##usly -nazareth -tyrol -1675 -assent -poorer -sectional -aegean -##cous -296 -nylon -ghanaian -##egorical -##weig -cushions -forbid -fusiliers -obstruction -somerville -##scia -dime -earrings -elliptical -leyte -oder -polymers -timmy -atm -midtown -piloted -settles -continual -externally -mayfield -##uh -enrichment -henson -keane -persians -1733 -benji -braden -pep -324 -##efe -contenders -pepsi -valet -##isches -298 -##asse -##earing -goofy -stroll -##amen -authoritarian -occurrences -adversary -ahmedabad -tangent -toppled -dorchester -1672 -modernism -marxism -islamist -charlemagne -exponential -racks -unicode -brunette -mbc -pic -skirmish -##bund -##lad -##powered -##yst -hoisted -messina -shatter -##ctum -jedi -vantage -##music -##neil -clemens -mahmoud -corrupted -authentication -lowry -nils -##washed -omnibus -wounding -jillian -##itors -##opped -serialized -narcotics -handheld -##arm -##plicity -intersecting -stimulating -##onis -crate -fellowships -hemingway -casinos -climatic -fordham -copeland -drip -beatty -leaflets -robber -brothel -madeira -##hedral -sphinx -ultrasound -##vana -valor -forbade -leonid -villas -##aldo -duane -marquez -##cytes -disadvantaged -forearms -kawasaki -reacts -consular -lax -uncles -uphold -##hopper -concepcion -dorsey -lass -##izan -arching -passageway -1708 -researches -tia -internationals -##graphs -##opers -distinguishes -javanese -divert -##uven -plotted -##listic -##rwin -##erik -##tify -affirmative -signifies -validation -##bson -kari -felicity -georgina -zulu -##eros -##rained -##rath -overcoming -##dot -argyll -##rbin -1734 -chiba -ratification -windy -earls -parapet -##marks -hunan -pristine -astrid -punta -##gart -brodie -##kota -##oder -malaga -minerva -rouse -##phonic -bellowed -pagoda -portals -reclamation -##gur -##odies -##⁄₄ -parentheses -quoting -allergic -palette -showcases -benefactor -heartland -nonlinear -##tness -bladed -cheerfully -scans -##ety -##hone -1666 -girlfriends -pedersen -hiram -sous -##liche -##nator -1683 -##nery -##orio -##umen -bobo -primaries -smiley -##cb -unearthed -uniformly -fis -metadata -1635 -ind -##oted -recoil -##titles -##tura -##ια -406 -hilbert -jamestown -mcmillan -tulane -seychelles -##frid -antics -coli -fated -stucco -##grants -1654 -bulky -accolades -arrays -caledonian -carnage -optimism -puebla -##tative -##cave -enforcing -rotherham -seo -dunlop -aeronautics -chimed -incline -zoning -archduke -hellenistic -##oses -##sions -candi -thong -##ople -magnate -rustic -##rsk -projective -slant -##offs -danes -hollis -vocalists -##ammed -congenital -contend -gesellschaft -##ocating -##pressive -douglass -quieter -##cm -##kshi -howled -salim -spontaneously -townsville -buena -southport -##bold -kato -1638 -faerie -stiffly -##vus -##rled -297 -flawless -realising -taboo -##7th -bytes -straightening -356 -jena -##hid -##rmin -cartwright -berber -bertram -soloists -411 -noses -417 -coping -fission -hardin -inca -##cen -1717 -mobilized -vhf -##raf -biscuits -curate -##85 -##anial -331 -gaunt -neighbourhoods -1540 -##abas -blanca -bypassed -sockets -behold -coincidentally -##bane -nara -shave -splinter -terrific -##arion -##erian -commonplace -juris -redwood -waistband -boxed -caitlin -fingerprints -jennie -naturalized -##ired -balfour -craters -jody -bungalow -hugely -quilt -glitter -pigeons -undertaker -bulging -constrained -goo -##sil -##akh -assimilation -reworked -##person -persuasion -##pants -felicia -##cliff -##ulent -1732 -explodes -##dun -##inium -##zic -lyman -vulture -hog -overlook -begs -northwards -ow -spoil -##urer -fatima -favorably -accumulate -sargent -sorority -corresponded -dispersal -kochi -toned -##imi -##lita -internacional -newfound -##agger -##lynn -##rigue -booths -peanuts -##eborg -medicare -muriel -nur -##uram -crates -millennia -pajamas -worsened -##breakers -jimi -vanuatu -yawned -##udeau -carousel -##hony -hurdle -##ccus -##mounted -##pod -rv -##eche -airship -ambiguity -compulsion -recapture -##claiming -arthritis -##osomal -1667 -asserting -ngc -sniffing -dade -discontent -glendale -ported -##amina -defamation -rammed -##scent -fling -livingstone -##fleet -875 -##ppy -apocalyptic -comrade -lcd -##lowe -cessna -eine -persecuted -subsistence -demi -hoop -reliefs -710 -coptic -progressing -stemmed -perpetrators -1665 -priestess -##nio -dobson -ebony -rooster -itf -tortricidae -##bbon -##jian -cleanup -##jean -##øy -1721 -eighties -taxonomic -holiness -##hearted -##spar -antilles -showcasing -stabilized -##nb -gia -mascara -michelangelo -dawned -##uria -##vinsky -extinguished -fitz -grotesque -£100 -##fera -##loid -##mous -barges -neue -throbbed -cipher -johnnie -##a1 -##mpt -outburst -##swick -spearheaded -administrations -c1 -heartbreak -pixels -pleasantly -##enay -lombardy -plush -##nsed -bobbie -##hly -reapers -tremor -xiang -minogue -substantive -hitch -barak -##wyl -kwan -##encia -910 -obscene -elegance -indus -surfer -bribery -conserve -##hyllum -##masters -horatio -##fat -apes -rebound -psychotic -##pour -iteration -##mium -##vani -botanic -horribly -antiques -dispose -paxton -##hli -##wg -timeless -1704 -disregard -engraver -hounds -##bau -##version -looted -uno -facilitates -groans -masjid -rutland -antibody -disqualification -decatur -footballers -quake -slacks -48th -rein -scribe -stabilize -commits -exemplary -tho -##hort -##chison -pantry -traversed -##hiti -disrepair -identifiable -vibrated -baccalaureate -##nnis -csa -interviewing -##iensis -##raße -greaves -wealthiest -343 -classed -jogged -£5 -##58 -##atal -illuminating -knicks -respecting -##uno -scrubbed -##iji -##dles -kruger -moods -growls -raider -silvia -chefs -kam -vr -cree -percival -##terol -gunter -counterattack -defiant -henan -ze -##rasia -##riety -equivalence -submissions -##fra -##thor -bautista -mechanically -##heater -cornice -herbal -templar -##mering -outputs -ruining -ligand -renumbered -extravagant -mika -blockbuster -eta -insurrection -##ilia -darkening -ferocious -pianos -strife -kinship -##aer -melee -##anor -##iste -##may -##oue -decidedly -weep -##jad -##missive -##ppel -354 -puget -unease -##gnant -1629 -hammering -kassel -ob -wessex -##lga -bromwich -egan -paranoia -utilization -##atable -##idad -contradictory -provoke -##ols -##ouring -##tangled -knesset -##very -##lette -plumbing -##sden -##¹ -greensboro -occult -sniff -338 -zev -beaming -gamer -haggard -mahal -##olt -##pins -mendes -utmost -briefing -gunnery -##gut -##pher -##zh -##rok -1679 -khalifa -sonya -##boot -principals -urbana -wiring -##liffe -##minating -##rrado -dahl -nyu -skepticism -np -townspeople -ithaca -lobster -somethin -##fur -##arina -##−1 -freighter -zimmerman -biceps -contractual -##herton -amend -hurrying -subconscious -##anal -336 -meng -clermont -spawning -##eia -##lub -dignitaries -impetus -snacks -spotting -twigs -##bilis -##cz -##ouk -libertadores -nic -skylar -##aina -##firm -gustave -asean -##anum -dieter -legislatures -flirt -bromley -trolls -umar -##bbies -##tyle -blah -parc -bridgeport -crank -negligence -##nction -46th -constantin -molded -bandages -seriousness -00pm -siegel -carpets -compartments -upbeat -statehood -##dner -##edging -marko -730 -platt -##hane -paving -##iy -1738 -abbess -impatience -limousine -nbl -##talk -441 -lucille -mojo -nightfall -robbers -##nais -karel -brisk -calves -replicate -ascribed -telescopes -##olf -intimidated -##reen -ballast -specialization -##sit -aerodynamic -caliphate -rainer -visionary -##arded -epsilon -##aday -##onte -aggregation -auditory -boosted -reunification -kathmandu -loco -robyn -402 -acknowledges -appointing -humanoid -newell -redeveloped -restraints -##tained -barbarians -chopper -1609 -italiana -##lez -##lho -investigates -wrestlemania -##anies -##bib -690 -##falls -creaked -dragoons -gravely -minions -stupidity -volley -##harat -##week -musik -##eries -##uously -fungal -massimo -semantics -malvern -##ahl -##pee -discourage -embryo -imperialism -1910s -profoundly -##ddled -jiangsu -sparkled -stat -##holz -sweatshirt -tobin -##iction -sneered -##cheon -##oit -brit -causal -smyth -##neuve -diffuse -perrin -silvio -##ipes -##recht -detonated -iqbal -selma -##nism -##zumi -roasted -##riders -tay -##ados -##mament -##mut -##rud -840 -completes -nipples -cfa -flavour -hirsch -##laus -calderon -sneakers -moravian -##ksha -1622 -rq -294 -##imeters -bodo -##isance -##pre -##ronia -anatomical -excerpt -##lke -dh -kunst -##tablished -##scoe -biomass -panted -unharmed -gael -housemates -montpellier -##59 -coa -rodents -tonic -hickory -singleton -##taro -451 -1719 -aldo -breaststroke -dempsey -och -rocco -##cuit -merton -dissemination -midsummer -serials -##idi -haji -polynomials -##rdon -gs -enoch -prematurely -shutter -taunton -£3 -##grating -##inates -archangel -harassed -##asco -326 -archway -dazzling -##ecin -1736 -sumo -wat -##kovich -1086 -honneur -##ently -##nostic -##ttal -##idon -1605 -403 -1716 -blogger -rents -##gnan -hires -##ikh -##dant -howie -##rons -handler -retracted -shocks -1632 -arun -duluth -kepler -trumpeter -##lary -peeking -seasoned -trooper -##mara -laszlo -##iciencies -##rti -heterosexual -##inatory -##ssion -indira -jogging -##inga -##lism -beit -dissatisfaction -malice -##ately -nedra -peeling -##rgeon -47th -stadiums -475 -vertigo -##ains -iced -restroom -##plify -##tub -illustrating -pear -##chner -##sibility -inorganic -rappers -receipts -watery -##kura -lucinda -##oulos -reintroduced -##8th -##tched -gracefully -saxons -nutritional -wastewater -rained -favourites -bedrock -fisted -hallways -likeness -upscale -##lateral -1580 -blinds -prequel -##pps -##tama -deter -humiliating -restraining -tn -vents -1659 -laundering -recess -rosary -tractors -coulter -federer -##ifiers -##plin -persistence -##quitable -geschichte -pendulum -quakers -##beam -bassett -pictorial -buffet -koln -##sitor -drills -reciprocal -shooters -##57 -##cton -##tees -converge -pip -dmitri -donnelly -yamamoto -aqua -azores -demographics -hypnotic -spitfire -suspend -wryly -roderick -##rran -sebastien -##asurable -mavericks -##fles -##200 -himalayan -prodigy -##iance -transvaal -demonstrators -handcuffs -dodged -mcnamara -sublime -1726 -crazed -##efined -##till -ivo -pondered -reconciled -shrill -sava -##duk -bal -cad -heresy -jaipur -goran -##nished -341 -lux -shelly -whitehall -##hre -israelis -peacekeeping -##wled -1703 -demetrius -ousted -##arians -##zos -beale -anwar -backstroke -raged -shrinking -cremated -##yck -benign -towing -wadi -darmstadt -landfill -parana -soothe -colleen -sidewalks -mayfair -tumble -hepatitis -ferrer -superstructure -##gingly -##urse -##wee -anthropological -translators -##mies -closeness -hooves -##pw -mondays -##roll -##vita -landscaping -##urized -purification -sock -thorns -thwarted -jalan -tiberius -##taka -saline -##rito -confidently -khyber -sculptors -##ij -brahms -hammersmith -inspectors -battista -fivb -fragmentation -hackney -##uls -arresting -exercising -antoinette -bedfordshire -##zily -dyed -##hema -1656 -racetrack -variability -##tique -1655 -austrians -deteriorating -madman -theorists -aix -lehman -weathered -1731 -decreed -eruptions -1729 -flaw -quinlan -sorbonne -flutes -nunez -1711 -adored -downwards -fable -rasped -1712 -moritz -mouthful -renegade -shivers -stunts -dysfunction -restrain -translit -327 -pancakes -##avio -##cision -##tray -351 -vial -##lden -bain -##maid -##oxide -chihuahua -malacca -vimes -##rba -##rnier -1664 -donnie -plaques -##ually -337 -bangs -floppy -huntsville -loretta -nikolay -##otte -eater -handgun -ubiquitous -##hett -eras -zodiac -1634 -##omorphic -1820s -##zog -cochran -##bula -##lithic -warring -##rada -dalai -excused -blazers -mcconnell -reeling -bot -este -##abi -geese -hoax -taxon -##bla -guitarists -##icon -condemning -hunts -inversion -moffat -taekwondo -##lvis -1624 -stammered -##rest -##rzy -sousa -fundraiser -marylebone -navigable -uptown -cabbage -daniela -salman -shitty -whimper -##kian -##utive -programmers -protections -rm -##rmi -##rued -forceful -##enes -fuss -##tao -##wash -brat -oppressive -reykjavik -spartak -ticking -##inkles -##kiewicz -adolph -horst -maui -protege -straighten -cpc -landau -concourse -clements -resultant -##ando -imaginative -joo -reactivated -##rem -##ffled -##uising -consultative -##guide -flop -kaitlyn -mergers -parenting -somber -##vron -supervise -vidhan -##imum -courtship -exemplified -harmonies -medallist -refining -##rrow -##ка -amara -##hum -780 -goalscorer -sited -overshadowed -rohan -displeasure -secretive -multiplied -osman -##orth -engravings -padre -##kali -##veda -miniatures -mis -##yala -clap -pali -rook -##cana -1692 -57th -antennae -astro -oskar -1628 -bulldog -crotch -hackett -yucatan -##sure -amplifiers -brno -ferrara -migrating -##gree -thanking -turing -##eza -mccann -ting -andersson -onslaught -gaines -ganga -incense -standardization -##mation -sentai -scuba -stuffing -turquoise -waivers -alloys -##vitt -regaining -vaults -##clops -##gizing -digger -furry -memorabilia -probing -##iad -payton -rec -deutschland -filippo -opaque -seamen -zenith -afrikaans -##filtration -disciplined -inspirational -##merie -banco -confuse -grafton -tod -##dgets -championed -simi -anomaly -biplane -##ceptive -electrode -##para -1697 -cleavage -crossbow -swirl -informant -##lars -##osta -afi -bonfire -spec -##oux -lakeside -slump -##culus -##lais -##qvist -##rrigan -1016 -facades -borg -inwardly -cervical -xl -pointedly -050 -stabilization -##odon -chests -1699 -hacked -ctv -orthogonal -suzy -##lastic -gaulle -jacobite -rearview -##cam -##erted -ashby -##drik -##igate -##mise -##zbek -affectionately -canine -disperse -latham -##istles -##ivar -spielberg -##orin -##idium -ezekiel -cid -##sg -durga -middletown -##cina -customized -frontiers -harden -##etano -##zzy -1604 -bolsheviks -##66 -coloration -yoko -##bedo -briefs -slabs -debra -liquidation -plumage -##oin -blossoms -dementia -subsidy -1611 -proctor -relational -jerseys -parochial -ter -##ici -esa -peshawar -cavalier -loren -cpi -idiots -shamrock -1646 -dutton -malabar -mustache -##endez -##ocytes -referencing -terminates -marche -yarmouth -##sop -acton -mated -seton -subtly -baptised -beige -extremes -jolted -kristina -telecast -##actic -safeguard -waldo -##baldi -##bular -endeavors -sloppy -subterranean -##ensburg -##itung -delicately -pigment -tq -##scu -1626 -##ound -collisions -coveted -herds -##personal -##meister -##nberger -chopra -##ricting -abnormalities -defective -galician -lucie -##dilly -alligator -likened -##genase -burundi -clears -complexion -derelict -deafening -diablo -fingered -champaign -dogg -enlist -isotope -labeling -mrna -##erre -brilliance -marvelous -##ayo -1652 -crawley -ether -footed -dwellers -deserts -hamish -rubs -warlock -skimmed -##lizer -870 -buick -embark -heraldic -irregularities -##ajan -kiara -##kulam -##ieg -antigen -kowalski -##lge -oakley -visitation -##mbit -vt -##suit -1570 -murderers -##miento -##rites -chimneys -##sling -condemn -custer -exchequer -havre -##ghi -fluctuations -##rations -dfb -hendricks -vaccines -##tarian -nietzsche -biking -juicy -##duced -brooding -scrolling -selangor -##ragan -352 -annum -boomed -seminole -sugarcane -##dna -departmental -dismissing -innsbruck -arteries -ashok -batavia -daze -kun -overtook -##rga -##tlan -beheaded -gaddafi -holm -electronically -faulty -galilee -fractures -kobayashi -##lized -gunmen -magma -aramaic -mala -eastenders -inference -messengers -bf -##qu -407 -bathrooms -##vere -1658 -flashbacks -ideally -misunderstood -##jali -##weather -mendez -##grounds -505 -uncanny -##iii -1709 -friendships -##nbc -sacrament -accommodated -reiterated -logistical -pebbles -thumped -##escence -administering -decrees -drafts -##flight -##cased -##tula -futuristic -picket -intimidation -winthrop -##fahan -interfered -339 -afar -francoise -morally -uta -cochin -croft -dwarfs -##bruck -##dents -##nami -biker -##hner -##meral -nano -##isen -##ometric -##pres -##ан -brightened -meek -parcels -securely -gunners -##jhl -##zko -agile -hysteria -##lten -##rcus -bukit -champs -chevy -cuckoo -leith -sadler -theologians -welded -##section -1663 -jj -plurality -xander -##rooms -##formed -shredded -temps -intimately -pau -tormented -##lok -##stellar -1618 -charred -ems -essen -##mmel -alarms -spraying -ascot -blooms -twinkle -##abia -##apes -internment -obsidian -##chaft -snoop -##dav -##ooping -malibu -##tension -quiver -##itia -hays -mcintosh -travers -walsall -##ffie -1623 -beverley -schwarz -plunging -structurally -m3 -rosenthal -vikram -##tsk -770 -ghz -##onda -##tiv -chalmers -groningen -pew -reckon -unicef -##rvis -55th -##gni -1651 -sulawesi -avila -cai -metaphysical -screwing -turbulence -##mberg -augusto -samba -56th -baffled -momentary -toxin -##urian -##wani -aachen -condoms -dali -steppe -##3d -##app -##oed -##year -adolescence -dauphin -electrically -inaccessible -microscopy -nikita -##ega -atv -##cel -##enter -##oles -##oteric -##ы -accountants -punishments -wrongly -bribes -adventurous -clinch -flinders -southland -##hem -##kata -gough -##ciency -lads -soared -##ה -undergoes -deformation -outlawed -rubbish -##arus -##mussen -##nidae -##rzburg -arcs -##ingdon -##tituted -1695 -wheelbase -wheeling -bombardier -campground -zebra -##lices -##oj -##bain -lullaby -##ecure -donetsk -wylie -grenada -##arding -##ης -squinting -eireann -opposes -##andra -maximal -runes -##broken -##cuting -##iface -##ror -##rosis -additive -britney -adultery -triggering -##drome -detrimental -aarhus -containment -jc -swapped -vichy -##ioms -madly -##oric -##rag -brant -##ckey -##trix -1560 -1612 -broughton -rustling -##stems -##uder -asbestos -mentoring -##nivorous -finley -leaps -##isan -apical -pry -slits -substitutes -##dict -intuitive -fantasia -insistent -unreasonable -##igen -##vna -domed -hannover -margot -ponder -##zziness -impromptu -jian -lc -rampage -stemming -##eft -andrey -gerais -whichever -amnesia -appropriated -anzac -clicks -modifying -ultimatum -cambrian -maids -verve -yellowstone -##mbs -conservatoire -##scribe -adherence -dinners -spectra -imperfect -mysteriously -sidekick -tatar -tuba -##aks -##ifolia -distrust -##athan -##zle -c2 -ronin -zac -##pse -celaena -instrumentalist -scents -skopje -##mbling -comical -compensated -vidal -condor -intersect -jingle -wavelengths -##urrent -mcqueen -##izzly -carp -weasel -422 -kanye -militias -postdoctoral -eugen -gunslinger -##ɛ -faux -hospice -##for -appalled -derivation -dwarves -##elis -dilapidated -##folk -astoria -philology -##lwyn -##otho -##saka -inducing -philanthropy -##bf -##itative -geek -markedly -sql -##yce -bessie -indices -rn -##flict -495 -frowns -resolving -weightlifting -tugs -cleric -contentious -1653 -mania -rms -##miya -##reate -##ruck -##tucket -bien -eels -marek -##ayton -##cence -discreet -unofficially -##ife -leaks -##bber -1705 -332 -dung -compressor -hillsborough -pandit -shillings -distal -##skin -381 -##tat -##you -nosed -##nir -mangrove -undeveloped -##idia -textures -##inho -##500 -##rise -ae -irritating -nay -amazingly -bancroft -apologetic -compassionate -kata -symphonies -##lovic -airspace -##lch -930 -gifford -precautions -fulfillment -sevilla -vulgar -martinique -##urities -looting -piccolo -tidy -##dermott -quadrant -armchair -incomes -mathematicians -stampede -nilsson -##inking -##scan -foo -quarterfinal -##ostal -shang -shouldered -squirrels -##owe -344 -vinegar -##bner -##rchy -##systems -delaying -##trics -ars -dwyer -rhapsody -sponsoring -##gration -bipolar -cinder -starters -##olio -##urst -421 -signage -##nty -aground -figurative -mons -acquaintances -duets -erroneously -soyuz -elliptic -recreated -##cultural -##quette -##ssed -##tma -##zcz -moderator -scares -##itaire -##stones -##udence -juniper -sighting -##just -##nsen -britten -calabria -ry -bop -cramer -forsyth -stillness -##л -airmen -gathers -unfit -##umber -##upt -taunting -##rip -seeker -streamlined -##bution -holster -schumann -tread -vox -##gano -##onzo -strive -dil -reforming -covent -newbury -predicting -##orro -decorate -tre -##puted -andover -ie -asahi -dept -dunkirk -gills -##tori -buren -huskies -##stis -##stov -abstracts -bets -loosen -##opa -1682 -yearning -##glio -##sir -berman -effortlessly -enamel -napoli -persist -##peration -##uez -attache -elisa -b1 -invitations -##kic -accelerating -reindeer -boardwalk -clutches -nelly -polka -starbucks -##kei -adamant -huey -lough -unbroken -adventurer -embroidery -inspecting -stanza -##ducted -naia -taluka -##pone -##roids -chases -deprivation -florian -##jing -##ppet -earthly -##lib -##ssee -colossal -foreigner -vet -freaks -patrice -rosewood -triassic -upstate -##pkins -dominates -ata -chants -ks -vo -##400 -##bley -##raya -##rmed -555 -agra -infiltrate -##ailing -##ilation -##tzer -##uppe -##werk -binoculars -enthusiast -fujian -squeak -##avs -abolitionist -almeida -boredom -hampstead -marsden -rations -##ands -inflated -334 -bonuses -rosalie -patna -##rco -329 -detachments -penitentiary -54th -flourishing -woolf -##dion -##etched -papyrus -##lster -##nsor -##toy -bobbed -dismounted -endelle -inhuman -motorola -tbs -wince -wreath -##ticus -hideout -inspections -sanjay -disgrace -infused -pudding -stalks -##urbed -arsenic -leases -##hyl -##rrard -collarbone -##waite -##wil -dowry -##bant -##edance -genealogical -nitrate -salamanca -scandals -thyroid -necessitated -##! -##" -### -##$ -##% -##& -##' -##( -##) -##* -##+ -##, -##- -##. -##/ -##: -##; -##< -##= -##> -##? -##@ -##[ -##\ -##] -##^ -##_ -##` -##{ -##| -##} -##~ -##¡ -##¢ -##£ -##¤ -##¥ -##¦ -##§ -##¨ -##© -##ª -##« -##¬ -##® -##± -##´ -##µ -##¶ -##· -##º -##» -##¼ -##¾ -##¿ -##æ -##ð -##÷ -##þ -##đ -##ħ -##ŋ -##œ -##ƒ -##ɐ -##ɑ -##ɒ -##ɔ -##ɕ -##ə -##ɡ -##ɣ -##ɨ -##ɪ -##ɫ -##ɬ -##ɯ -##ɲ -##ɴ -##ɹ -##ɾ -##ʀ -##ʁ -##ʂ -##ʃ -##ʉ -##ʊ -##ʋ -##ʌ -##ʎ -##ʐ -##ʑ -##ʒ -##ʔ -##ʰ -##ʲ -##ʳ -##ʷ -##ʸ -##ʻ -##ʼ -##ʾ -##ʿ -##ˈ -##ˡ -##ˢ -##ˣ -##ˤ -##β -##γ -##δ -##ε -##ζ -##θ -##κ -##λ -##μ -##ξ -##ο -##π -##ρ -##σ -##τ -##υ -##φ -##χ -##ψ -##ω -##б -##г -##д -##ж -##з -##м -##п -##с -##у -##ф -##х -##ц -##ч -##ш -##щ -##ъ -##э -##ю -##ђ -##є -##і -##ј -##љ -##њ -##ћ -##ӏ -##ա -##բ -##գ -##դ -##ե -##թ -##ի -##լ -##կ -##հ -##մ -##յ -##ն -##ո -##պ -##ս -##վ -##տ -##ր -##ւ -##ք -##־ -##א -##ב -##ג -##ד -##ו -##ז -##ח -##ט -##י -##ך -##כ -##ל -##ם -##מ -##ן -##נ -##ס -##ע -##ף -##פ -##ץ -##צ -##ק -##ר -##ש -##ת -##، -##ء -##ب -##ت -##ث -##ج -##ح -##خ -##ذ -##ز -##س -##ش -##ص -##ض -##ط -##ظ -##ع -##غ -##ـ -##ف -##ق -##ك -##و -##ى -##ٹ -##پ -##چ -##ک -##گ -##ں -##ھ -##ہ -##ے -##अ -##आ -##उ -##ए -##क -##ख -##ग -##च -##ज -##ट -##ड -##ण -##त -##थ -##द -##ध -##न -##प -##ब -##भ -##म -##य -##र -##ल -##व -##श -##ष -##स -##ह -##ा -##ि -##ी -##ो -##। -##॥ -##ং -##অ -##আ -##ই -##উ -##এ -##ও -##ক -##খ -##গ -##চ -##ছ -##জ -##ট -##ড -##ণ -##ত -##থ -##দ -##ধ -##ন -##প -##ব -##ভ -##ম -##য -##র -##ল -##শ -##ষ -##স -##হ -##া -##ি -##ী -##ে -##க -##ச -##ட -##த -##ந -##ன -##ப -##ம -##ய -##ர -##ல -##ள -##வ -##ா -##ி -##ு -##ே -##ை -##ನ -##ರ -##ಾ -##ක -##ය -##ර -##ල -##ව -##ා -##ก -##ง -##ต -##ท -##น -##พ -##ม -##ย -##ร -##ล -##ว -##ส -##อ -##า -##เ -##་ -##། -##ག -##ང -##ད -##ན -##པ -##བ -##མ -##འ -##ར -##ལ -##ས -##မ -##ა -##ბ -##გ -##დ -##ე -##ვ -##თ -##ი -##კ -##ლ -##მ -##ნ -##ო -##რ -##ს -##ტ -##უ -##ᄀ -##ᄂ -##ᄃ -##ᄅ -##ᄆ -##ᄇ -##ᄉ -##ᄊ -##ᄋ -##ᄌ -##ᄎ -##ᄏ -##ᄐ -##ᄑ -##ᄒ -##ᅡ -##ᅢ -##ᅥ -##ᅦ -##ᅧ -##ᅩ -##ᅪ -##ᅭ -##ᅮ -##ᅯ -##ᅲ -##ᅳ -##ᅴ -##ᅵ -##ᆨ -##ᆫ -##ᆯ -##ᆷ -##ᆸ -##ᆼ -##ᴬ -##ᴮ -##ᴰ -##ᴵ -##ᴺ -##ᵀ -##ᵃ -##ᵇ -##ᵈ -##ᵉ -##ᵍ -##ᵏ -##ᵐ -##ᵒ -##ᵖ -##ᵗ -##ᵘ -##ᵣ -##ᵤ -##ᵥ -##ᶜ -##ᶠ -##‐ -##‑ -##‒ -##– -##— -##― -##‖ -##‘ -##’ -##‚ -##“ -##” -##„ -##† -##‡ -##• -##… -##‰ -##′ -##″ -##› -##‿ -##⁄ -##⁰ -##ⁱ -##⁴ -##⁵ -##⁶ -##⁷ -##⁸ -##⁹ -##⁻ -##ⁿ -##₅ -##₆ -##₇ -##₈ -##₉ -##₊ -##₍ -##₎ -##ₐ -##ₑ -##ₒ -##ₓ -##ₕ -##ₖ -##ₗ -##ₘ -##ₚ -##ₛ -##ₜ -##₤ -##₩ -##€ -##₱ -##₹ -##ℓ -##№ -##ℝ -##™ -##⅓ -##⅔ -##← -##↑ -##→ -##↓ -##↔ -##↦ -##⇄ -##⇌ -##⇒ -##∂ -##∅ -##∆ -##∇ -##∈ -##∗ -##∘ -##√ -##∞ -##∧ -##∨ -##∩ -##∪ -##≈ -##≡ -##≤ -##≥ -##⊂ -##⊆ -##⊕ -##⊗ -##⋅ -##─ -##│ -##■ -##▪ -##● -##★ -##☆ -##☉ -##♠ -##♣ -##♥ -##♦ -##♯ -##⟨ -##⟩ -##ⱼ -##⺩ -##⺼ -##⽥ -##、 -##。 -##〈 -##〉 -##《 -##》 -##「 -##」 -##『 -##』 -##〜 -##あ -##い -##う -##え -##お -##か -##き -##く -##け -##こ -##さ -##し -##す -##せ -##そ -##た -##ち -##っ -##つ -##て -##と -##な -##に -##ぬ -##ね -##の -##は -##ひ -##ふ -##へ -##ほ -##ま -##み -##む -##め -##も -##や -##ゆ -##よ -##ら -##り -##る -##れ -##ろ -##を -##ん -##ァ -##ア -##ィ -##イ -##ウ -##ェ -##エ -##オ -##カ -##キ -##ク -##ケ -##コ -##サ -##シ -##ス -##セ -##タ -##チ -##ッ -##ツ -##テ -##ト -##ナ -##ニ -##ノ -##ハ -##ヒ -##フ -##ヘ -##ホ -##マ -##ミ -##ム -##メ -##モ -##ャ -##ュ -##ョ -##ラ -##リ -##ル -##レ -##ロ -##ワ -##ン -##・ -##ー -##一 -##三 -##上 -##下 -##不 -##世 -##中 -##主 -##久 -##之 -##也 -##事 -##二 -##五 -##井 -##京 -##人 -##亻 -##仁 -##介 -##代 -##仮 -##伊 -##会 -##佐 -##侍 -##保 -##信 -##健 -##元 -##光 -##八 -##公 -##内 -##出 -##分 -##前 -##劉 -##力 -##加 -##勝 -##北 -##区 -##十 -##千 -##南 -##博 -##原 -##口 -##古 -##史 -##司 -##合 -##吉 -##同 -##名 -##和 -##囗 -##四 -##国 -##國 -##土 -##地 -##坂 -##城 -##堂 -##場 -##士 -##夏 -##外 -##大 -##天 -##太 -##夫 -##奈 -##女 -##子 -##学 -##宀 -##宇 -##安 -##宗 -##定 -##宣 -##宮 -##家 -##宿 -##寺 -##將 -##小 -##尚 -##山 -##岡 -##島 -##崎 -##川 -##州 -##巿 -##帝 -##平 -##年 -##幸 -##广 -##弘 -##張 -##彳 -##後 -##御 -##德 -##心 -##忄 -##志 -##忠 -##愛 -##成 -##我 -##戦 -##戸 -##手 -##扌 -##政 -##文 -##新 -##方 -##日 -##明 -##星 -##春 -##昭 -##智 -##曲 -##書 -##月 -##有 -##朝 -##木 -##本 -##李 -##村 -##東 -##松 -##林 -##森 -##楊 -##樹 -##橋 -##歌 -##止 -##正 -##武 -##比 -##氏 -##民 -##水 -##氵 -##氷 -##永 -##江 -##沢 -##河 -##治 -##法 -##海 -##清 -##漢 -##瀬 -##火 -##版 -##犬 -##王 -##生 -##田 -##男 -##疒 -##発 -##白 -##的 -##皇 -##目 -##相 -##省 -##真 -##石 -##示 -##社 -##神 -##福 -##禾 -##秀 -##秋 -##空 -##立 -##章 -##竹 -##糹 -##美 -##義 -##耳 -##良 -##艹 -##花 -##英 -##華 -##葉 -##藤 -##行 -##街 -##西 -##見 -##訁 -##語 -##谷 -##貝 -##貴 -##車 -##軍 -##辶 -##道 -##郎 -##郡 -##部 -##都 -##里 -##野 -##金 -##鈴 -##镇 -##長 -##門 -##間 -##阝 -##阿 -##陳 -##陽 -##雄 -##青 -##面 -##風 -##食 -##香 -##馬 -##高 -##龍 -##龸 -##fi -##fl -##! -##( -##) -##, -##- -##. -##/ -##: -##? -##~ diff --git a/crawl4ai/scraper/__init__.py b/crawl4ai/scraper/__init__.py index 1138a91..5af7ad6 100644 --- a/crawl4ai/scraper/__init__.py +++ b/crawl4ai/scraper/__init__.py @@ -1,3 +1,5 @@ from .async_web_scraper import AsyncWebScraper from .bfs_scraper_strategy import BFSScraperStrategy -from .filters import URLFilter, FilterChain, URLPatternFilter, ContentTypeFilter \ No newline at end of file +from .filters import URLFilter, FilterChain, URLPatternFilter, ContentTypeFilter, DomainFilter +from .scorers import KeywordRelevanceScorer, PathDepthScorer, FreshnessScorer, CompositeScorer +from .scraper_strategy import ScraperStrategy \ No newline at end of file diff --git a/crawl4ai/scraper/bfs_scraper_strategy.py b/crawl4ai/scraper/bfs_scraper_strategy.py index 7293500..3a6d09a 100644 --- a/crawl4ai/scraper/bfs_scraper_strategy.py +++ b/crawl4ai/scraper/bfs_scraper_strategy.py @@ -1,10 +1,9 @@ -from abc import ABC, abstractmethod -from typing import Union, AsyncGenerator, Optional, Dict, Set +from typing import AsyncGenerator, Optional, Dict, Set from dataclasses import dataclass from datetime import datetime import asyncio import logging -from urllib.parse import urljoin, urlparse, urlunparse +from urllib.parse import urlparse from urllib.robotparser import RobotFileParser import validators import time @@ -12,10 +11,11 @@ from tenacity import retry, stop_after_attempt, wait_exponential from collections import defaultdict -from .models import ScraperResult, CrawlResult +from .models import CrawlResult from .filters import FilterChain from .scorers import URLScorer from ..async_webcrawler import AsyncWebCrawler +from .scraper_strategy import ScraperStrategy @dataclass class CrawlStats: @@ -28,30 +28,6 @@ class CrawlStats: current_depth: int = 0 robots_blocked: int = 0 -class ScraperStrategy(ABC): - """Base class for scraping strategies""" - - @abstractmethod - async def ascrape( - self, - url: str, - crawler: AsyncWebCrawler, - parallel_processing: bool = True, - stream: bool = False - ) -> Union[AsyncGenerator[CrawlResult, None], ScraperResult]: - """Abstract method for scraping implementation""" - pass - - @abstractmethod - async def can_process_url(self, url: str) -> bool: - """Check if URL can be processed based on strategy rules""" - pass - - @abstractmethod - async def shutdown(self): - """Clean up resources used by the strategy""" - pass - class BFSScraperStrategy(ScraperStrategy): """Breadth-First Search scraping strategy with politeness controls""" @@ -60,6 +36,7 @@ def __init__( max_depth: int, filter_chain: FilterChain, url_scorer: URLScorer, + process_external_links: bool = False, max_concurrent: int = 5, min_crawl_delay: int = 1, timeout: int = 30, @@ -76,7 +53,7 @@ def __init__( # Crawl control self.stats = CrawlStats(start_time=datetime.now()) self._cancel_event = asyncio.Event() - self.process_external_links = False + self.process_external_links = process_external_links # Rate limiting and politeness self.rate_limiter = AsyncLimiter(1, 1) @@ -84,7 +61,7 @@ def __init__( self.robot_parsers: Dict[str, RobotFileParser] = {} self.domain_queues: Dict[str, asyncio.Queue] = defaultdict(asyncio.Queue) - async def can_process_url(self, url: str) -> bool: + async def can_process_url(self, url: str, depth: int) -> bool: """Check if URL can be processed based on robots.txt and filters This is our gatekeeper method that determines if a URL should be processed. It: - Validates URL format using the validators library @@ -103,7 +80,11 @@ async def can_process_url(self, url: str) -> bool: self.logger.info(f"Blocked by robots.txt: {url}") return False - return self.filter_chain.apply(url) + # Apply the filter chain it's not start page + if depth != 0 and not self.filter_chain.apply(url): + return False + + return True async def _get_robot_parser(self, url: str) -> Optional[RobotFileParser]: """Get or create robots.txt parser for domain. @@ -135,11 +116,15 @@ async def _crawl_with_retry( ) -> CrawlResult: """Crawl URL with retry logic""" try: - async with asyncio.timeout(self.timeout): - return await crawler.arun(url) + return await asyncio.wait_for(crawler.arun(url), timeout=self.timeout) except asyncio.TimeoutError: self.logger.error(f"Timeout crawling {url}") raise + except Exception as e: + # Catch any other exceptions that may cause retries + self.logger.error(f"Error crawling {url}: {e}") + raise + async def process_url( self, @@ -165,7 +150,7 @@ async def process_url( if self._cancel_event.is_set(): return None - if not await self.can_process_url(url): + if not await self.can_process_url(url, depth): self.stats.urls_skipped += 1 return None @@ -181,16 +166,14 @@ async def process_url( async with self.rate_limiter: result = await self._crawl_with_retry(crawler, url) self.stats.urls_processed += 1 + # Process links + await self._process_links(result, url, depth, queue, visited, depths) + return result except Exception as e: self.logger.error(f"Error crawling {url}: {e}") self.stats.urls_failed += 1 return None - # Process links - await self._process_links(result, url, depth, queue, visited, depths) - - return result - async def _process_links( self, result: CrawlResult, @@ -210,25 +193,26 @@ async def _process_links( Adds valid URLs to the queue Updates maximum depth statistics """ - links_ro_process = result.links["internal"] + links_to_process = result.links["internal"] if self.process_external_links: - links_ro_process += result.links["external"] - for link_type in links_ro_process: - for link in result.links[link_type]: - url = link['href'] - # url = urljoin(source_url, link['href']) - # url = urlunparse(urlparse(url)._replace(fragment="")) - - if url not in visited and await self.can_process_url(url): - new_depth = depths[source_url] + 1 - if new_depth <= self.max_depth: + links_to_process += result.links["external"] + for link in links_to_process: + url = link['href'] + if url not in visited: + new_depth = depths[source_url] + 1 + if new_depth <= self.max_depth: + if self.url_scorer: score = self.url_scorer.score(url) - await queue.put((score, new_depth, url)) - depths[url] = new_depth - self.stats.total_depth_reached = max( - self.stats.total_depth_reached, - new_depth - ) + else: + # When no url_scorer is provided all urls will have same score of 0. + # Therefore will be process in FIFO order as per URL depth + score = 0 + await queue.put((score, new_depth, url)) + depths[url] = new_depth + self.stats.total_depth_reached = max( + self.stats.total_depth_reached, + new_depth + ) async def ascrape( self, diff --git a/crawl4ai/scraper/scraper_strategy.py b/crawl4ai/scraper/scraper_strategy.py index e4872de..396ea7c 100644 --- a/crawl4ai/scraper/scraper_strategy.py +++ b/crawl4ai/scraper/scraper_strategy.py @@ -6,7 +6,13 @@ class ScraperStrategy(ABC): @abstractmethod - async def ascrape(self, url: str, crawler: AsyncWebCrawler, parallel_processing: bool = True, stream: bool = False) -> Union[AsyncGenerator[CrawlResult, None], ScraperResult]: + async def ascrape( + self, + url: str, + crawler: AsyncWebCrawler, + parallel_processing: bool = True, + stream: bool = False + ) -> Union[AsyncGenerator[CrawlResult, None], ScraperResult]: """Scrape the given URL using the specified crawler. Args: @@ -23,4 +29,14 @@ async def ascrape(self, url: str, crawler: AsyncWebCrawler, parallel_processing: ScraperResult: A summary of the scrape results containing the final extracted data and the list of crawled URLs if stream is False. """ + pass + + @abstractmethod + async def can_process_url(self, url: str, depth: int) -> bool: + """Check if URL can be processed based on strategy rules""" + pass + + @abstractmethod + async def shutdown(self): + """Clean up resources used by the strategy""" pass \ No newline at end of file diff --git a/crawl4ai/tools.py b/crawl4ai/tools.py new file mode 100644 index 0000000..ff36b53 --- /dev/null +++ b/crawl4ai/tools.py @@ -0,0 +1,34 @@ +import time +import cProfile +import pstats +from functools import wraps + +def profile_and_time(func): + @wraps(func) + def wrapper(self, *args, **kwargs): + # Start timer + start_time = time.perf_counter() + + # Setup profiler + profiler = cProfile.Profile() + profiler.enable() + + # Run function + result = func(self, *args, **kwargs) + + # Stop profiler + profiler.disable() + + # Calculate elapsed time + elapsed_time = time.perf_counter() - start_time + + # Print timing + print(f"[PROFILER] Scraping completed in {elapsed_time:.2f} seconds") + + # Print profiling stats + stats = pstats.Stats(profiler) + stats.sort_stats('cumulative') # Sort by cumulative time + stats.print_stats(20) # Print top 20 time-consuming functions + + return result + return wrapper \ No newline at end of file diff --git a/crawl4ai/train.py b/crawl4ai/train.py deleted file mode 100644 index f7e7c1a..0000000 --- a/crawl4ai/train.py +++ /dev/null @@ -1,146 +0,0 @@ -import spacy -from spacy.training import Example -import random -import nltk -from nltk.corpus import reuters -import torch - -def save_spacy_model_as_torch(nlp, model_dir="models/reuters"): - # Extract the TextCategorizer component - textcat = nlp.get_pipe("textcat_multilabel") - - # Convert the weights to a PyTorch state dictionary - state_dict = {name: torch.tensor(param.data) for name, param in textcat.model.named_parameters()} - - # Save the state dictionary - torch.save(state_dict, f"{model_dir}/model_weights.pth") - - # Extract and save the vocabulary - vocab = extract_vocab(nlp) - with open(f"{model_dir}/vocab.txt", "w") as vocab_file: - for word, idx in vocab.items(): - vocab_file.write(f"{word}\t{idx}\n") - - print(f"Model weights and vocabulary saved to: {model_dir}") - -def extract_vocab(nlp): - # Extract vocabulary from the SpaCy model - vocab = {word: i for i, word in enumerate(nlp.vocab.strings)} - return vocab - -nlp = spacy.load("models/reuters") -save_spacy_model_as_torch(nlp, model_dir="models") - -def train_and_save_reuters_model(model_dir="models/reuters"): - # Ensure the Reuters corpus is downloaded - nltk.download('reuters') - nltk.download('punkt') - if not reuters.fileids(): - print("Reuters corpus not found.") - return - - # Load a blank English spaCy model - nlp = spacy.blank("en") - - # Create a TextCategorizer with the ensemble model for multi-label classification - textcat = nlp.add_pipe("textcat_multilabel") - - # Add labels to text classifier - for label in reuters.categories(): - textcat.add_label(label) - - # Prepare training data - train_examples = [] - for fileid in reuters.fileids(): - categories = reuters.categories(fileid) - text = reuters.raw(fileid) - cats = {label: label in categories for label in reuters.categories()} - # Prepare spacy Example objects - doc = nlp.make_doc(text) - example = Example.from_dict(doc, {'cats': cats}) - train_examples.append(example) - - # Initialize the text categorizer with the example objects - nlp.initialize(lambda: train_examples) - - # Train the model - random.seed(1) - spacy.util.fix_random_seed(1) - for i in range(5): # Adjust iterations for better accuracy - random.shuffle(train_examples) - losses = {} - # Create batches of data - batches = spacy.util.minibatch(train_examples, size=8) - for batch in batches: - nlp.update(batch, drop=0.2, losses=losses) - print(f"Losses at iteration {i}: {losses}") - - # Save the trained model - nlp.to_disk(model_dir) - print(f"Model saved to: {model_dir}") - -def train_model(model_dir, additional_epochs=0): - # Load the model if it exists, otherwise start with a blank model - try: - nlp = spacy.load(model_dir) - print("Model loaded from disk.") - except IOError: - print("No existing model found. Starting with a new model.") - nlp = spacy.blank("en") - textcat = nlp.add_pipe("textcat_multilabel") - for label in reuters.categories(): - textcat.add_label(label) - - # Prepare training data - train_examples = [] - for fileid in reuters.fileids(): - categories = reuters.categories(fileid) - text = reuters.raw(fileid) - cats = {label: label in categories for label in reuters.categories()} - doc = nlp.make_doc(text) - example = Example.from_dict(doc, {'cats': cats}) - train_examples.append(example) - - # Initialize the model if it was newly created - if 'textcat_multilabel' not in nlp.pipe_names: - nlp.initialize(lambda: train_examples) - else: - print("Continuing training with existing model.") - - # Train the model - random.seed(1) - spacy.util.fix_random_seed(1) - num_epochs = 5 + additional_epochs - for i in range(num_epochs): - random.shuffle(train_examples) - losses = {} - batches = spacy.util.minibatch(train_examples, size=8) - for batch in batches: - nlp.update(batch, drop=0.2, losses=losses) - print(f"Losses at iteration {i}: {losses}") - - # Save the trained model - nlp.to_disk(model_dir) - print(f"Model saved to: {model_dir}") - -def load_model_and_predict(model_dir, text, tok_k = 3): - # Load the trained model from the specified directory - nlp = spacy.load(model_dir) - - # Process the text with the loaded model - doc = nlp(text) - - # gee top 3 categories - top_categories = sorted(doc.cats.items(), key=lambda x: x[1], reverse=True)[:tok_k] - print(f"Top {tok_k} categories:") - - return top_categories - -if __name__ == "__main__": - train_and_save_reuters_model() - train_model("models/reuters", additional_epochs=5) - model_directory = "reuters_model_10" - print(reuters.categories()) - example_text = "Apple Inc. is reportedly buying a startup for $1 billion" - r =load_model_and_predict(model_directory, example_text) - print(r) \ No newline at end of file diff --git a/crawl4ai/utils.py b/crawl4ai/utils.py index efb5d79..b07562d 100644 --- a/crawl4ai/utils.py +++ b/crawl4ai/utils.py @@ -1,13 +1,12 @@ import time from concurrent.futures import ThreadPoolExecutor, as_completed from bs4 import BeautifulSoup, Comment, element, Tag, NavigableString -import html2text import json import html import re import os import platform -from html2text import HTML2Text +from .html2text import HTML2Text from .prompts import PROMPT_EXTRACT_BLOCKS from .config import * from pathlib import Path @@ -15,6 +14,97 @@ from urllib.parse import urljoin import requests from requests.exceptions import InvalidSchema +import hashlib +from typing import Optional, Tuple, Dict, Any +import xxhash + + +from .html2text import HTML2Text +class CustomHTML2Text(HTML2Text): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.inside_pre = False + self.inside_code = False + self.preserve_tags = set() # Set of tags to preserve + self.current_preserved_tag = None + self.preserved_content = [] + self.preserve_depth = 0 + + # Configuration options + self.skip_internal_links = False + self.single_line_break = False + self.mark_code = False + self.include_sup_sub = False + self.body_width = 0 + self.ignore_mailto_links = True + self.ignore_links = False + self.escape_backslash = False + self.escape_dot = False + self.escape_plus = False + self.escape_dash = False + self.escape_snob = False + + def update_params(self, **kwargs): + """Update parameters and set preserved tags.""" + for key, value in kwargs.items(): + if key == 'preserve_tags': + self.preserve_tags = set(value) + else: + setattr(self, key, value) + + def handle_tag(self, tag, attrs, start): + # Handle preserved tags + if tag in self.preserve_tags: + if start: + if self.preserve_depth == 0: + self.current_preserved_tag = tag + self.preserved_content = [] + # Format opening tag with attributes + attr_str = ''.join(f' {k}="{v}"' for k, v in attrs.items() if v is not None) + self.preserved_content.append(f'<{tag}{attr_str}>') + self.preserve_depth += 1 + return + else: + self.preserve_depth -= 1 + if self.preserve_depth == 0: + self.preserved_content.append(f'') + # Output the preserved HTML block with proper spacing + preserved_html = ''.join(self.preserved_content) + self.o('\n' + preserved_html + '\n') + self.current_preserved_tag = None + return + + # If we're inside a preserved tag, collect all content + if self.preserve_depth > 0: + if start: + # Format nested tags with attributes + attr_str = ''.join(f' {k}="{v}"' for k, v in attrs.items() if v is not None) + self.preserved_content.append(f'<{tag}{attr_str}>') + else: + self.preserved_content.append(f'') + return + + # Handle pre tags + if tag == 'pre': + if start: + self.o('```\n') + self.inside_pre = True + else: + self.o('\n```') + self.inside_pre = False + # elif tag in ["h1", "h2", "h3", "h4", "h5", "h6"]: + # pass + else: + super().handle_tag(tag, attrs, start) + + def handle_data(self, data, entity_char=False): + """Override handle_data to capture content within preserved tags.""" + if self.preserve_depth > 0: + self.preserved_content.append(data) + return + super().handle_data(data, entity_char) + + class InvalidCSSSelectorError(Exception): pass @@ -61,7 +151,7 @@ class MEMORYSTATUSEX(ctypes.Structure): raise OSError("Unsupported operating system") def get_home_folder(): - home_folder = os.path.join(Path.home(), ".crawl4ai") + home_folder = os.path.join(os.getenv("CRAWL4_AI_BASE_DIRECTORY", os.getenv("CRAWL4_AI_BASE_DIRECTORY", Path.home())), ".crawl4ai") os.makedirs(home_folder, exist_ok=True) os.makedirs(f"{home_folder}/cache", exist_ok=True) os.makedirs(f"{home_folder}/models", exist_ok=True) @@ -179,12 +269,25 @@ def escape_json_string(s): return s -class CustomHTML2Text(HTML2Text): +class CustomHTML2Text_v0(HTML2Text): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - self.ignore_links = True self.inside_pre = False self.inside_code = False + + self.skip_internal_links = False + self.single_line_break = False + self.mark_code = False + self.include_sup_sub = False + self.body_width = 0 + self.ignore_mailto_links = True + self.ignore_links = False + self.escape_backslash = False + self.escape_dot = False + self.escape_plus = False + self.escape_dash = False + self.escape_snob = False + def handle_tag(self, tag, attrs, start): if tag == 'pre': @@ -194,6 +297,10 @@ def handle_tag(self, tag, attrs, start): else: self.o('\n```') self.inside_pre = False + elif tag in ["h1", "h2", "h3", "h4", "h5", "h6"]: + pass + + # elif tag == 'code' and not self.inside_pre: # if start: # if not self.inside_pre: @@ -690,10 +797,13 @@ def flatten_nested_elements(node): body = flatten_nested_elements(body) base64_pattern = re.compile(r'data:image/[^;]+;base64,([^"]+)') for img in imgs: - src = img.get('src', '') - if base64_pattern.match(src): - # Replace base64 data with empty string - img['src'] = base64_pattern.sub('', src) + try: + src = img.get('src', '') + if base64_pattern.match(src): + img['src'] = base64_pattern.sub('', src) + except: + pass + cleaned_html = str(body).replace('\n\n', '\n').replace(' ', ' ') cleaned_html = sanitize_html(cleaned_html) @@ -717,46 +827,54 @@ def flatten_nested_elements(node): 'metadata': meta } -def extract_metadata(html, soup = None): +def extract_metadata(html, soup=None): metadata = {} - if not html: - return metadata + if not html and not soup: + return {} - # Parse HTML content with BeautifulSoup if not soup: - soup = BeautifulSoup(html, 'html.parser') - + soup = BeautifulSoup(html, 'lxml') + + head = soup.head + if not head: + return metadata + # Title - title_tag = soup.find('title') - metadata['title'] = title_tag.string if title_tag else None + title_tag = head.find('title') + metadata['title'] = title_tag.string.strip() if title_tag and title_tag.string else None # Meta description - description_tag = soup.find('meta', attrs={'name': 'description'}) - metadata['description'] = description_tag['content'] if description_tag else None + description_tag = head.find('meta', attrs={'name': 'description'}) + metadata['description'] = description_tag.get('content', '').strip() if description_tag else None # Meta keywords - keywords_tag = soup.find('meta', attrs={'name': 'keywords'}) - metadata['keywords'] = keywords_tag['content'] if keywords_tag else None + keywords_tag = head.find('meta', attrs={'name': 'keywords'}) + metadata['keywords'] = keywords_tag.get('content', '').strip() if keywords_tag else None # Meta author - author_tag = soup.find('meta', attrs={'name': 'author'}) - metadata['author'] = author_tag['content'] if author_tag else None + author_tag = head.find('meta', attrs={'name': 'author'}) + metadata['author'] = author_tag.get('content', '').strip() if author_tag else None # Open Graph metadata - og_tags = soup.find_all('meta', attrs={'property': lambda value: value and value.startswith('og:')}) + og_tags = head.find_all('meta', attrs={'property': re.compile(r'^og:')}) for tag in og_tags: - property_name = tag['property'] - metadata[property_name] = tag['content'] + property_name = tag.get('property', '').strip() + content = tag.get('content', '').strip() + if property_name and content: + metadata[property_name] = content # Twitter Card metadata - twitter_tags = soup.find_all('meta', attrs={'name': lambda value: value and value.startswith('twitter:')}) + twitter_tags = head.find_all('meta', attrs={'name': re.compile(r'^twitter:')}) for tag in twitter_tags: - property_name = tag['name'] - metadata[property_name] = tag['content'] - + property_name = tag.get('name', '').strip() + content = tag.get('content', '').strip() + if property_name and content: + metadata[property_name] = content + return metadata + def extract_xml_tags(string): tags = re.findall(r'<(\w+)>', string) return list(set(tags)) @@ -964,4 +1082,145 @@ def format_html(html_string): soup = BeautifulSoup(html_string, 'html.parser') return soup.prettify() +def normalize_url(href, base_url): + """Normalize URLs to ensure consistent format""" + from urllib.parse import urljoin, urlparse + + # Parse base URL to get components + parsed_base = urlparse(base_url) + if not parsed_base.scheme or not parsed_base.netloc: + raise ValueError(f"Invalid base URL format: {base_url}") + + # Use urljoin to handle all cases + normalized = urljoin(base_url, href.strip()) + return normalized + +def normalize_url_tmp(href, base_url): + """Normalize URLs to ensure consistent format""" + # Extract protocol and domain from base URL + try: + base_parts = base_url.split('/') + protocol = base_parts[0] + domain = base_parts[2] + except IndexError: + raise ValueError(f"Invalid base URL format: {base_url}") + + # Handle special protocols + special_protocols = {'mailto:', 'tel:', 'ftp:', 'file:', 'data:', 'javascript:'} + if any(href.lower().startswith(proto) for proto in special_protocols): + return href.strip() + + # Handle anchor links + if href.startswith('#'): + return f"{base_url}{href}" + + # Handle protocol-relative URLs + if href.startswith('//'): + return f"{protocol}{href}" + + # Handle root-relative URLs + if href.startswith('/'): + return f"{protocol}//{domain}{href}" + + # Handle relative URLs + if not href.startswith(('http://', 'https://')): + # Remove leading './' if present + href = href.lstrip('./') + return f"{protocol}//{domain}/{href}" + + return href.strip() + +def is_external_url(url, base_domain): + """Determine if a URL is external""" + special_protocols = {'mailto:', 'tel:', 'ftp:', 'file:', 'data:', 'javascript:'} + if any(url.lower().startswith(proto) for proto in special_protocols): + return True + + try: + # Handle URLs with protocol + if url.startswith(('http://', 'https://')): + url_domain = url.split('/')[2] + return base_domain.lower() not in url_domain.lower() + except IndexError: + return False + + return False +def clean_tokens(tokens: list[str]) -> list[str]: + # Set of tokens to remove + noise = {'ccp', 'up', '↑', '▲', '⬆️', 'a', 'an', 'at', 'by', 'in', 'of', 'on', 'to', 'the'} + + STOP_WORDS = { + 'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from', + 'has', 'he', 'in', 'is', 'it', 'its', 'of', 'on', 'that', 'the', + 'to', 'was', 'were', 'will', 'with', + + # Pronouns + 'i', 'you', 'he', 'she', 'it', 'we', 'they', + 'me', 'him', 'her', 'us', 'them', + 'my', 'your', 'his', 'her', 'its', 'our', 'their', + 'mine', 'yours', 'hers', 'ours', 'theirs', + 'myself', 'yourself', 'himself', 'herself', 'itself', 'ourselves', 'themselves', + + # Common verbs + 'am', 'is', 'are', 'was', 'were', 'be', 'been', 'being', + 'have', 'has', 'had', 'having', 'do', 'does', 'did', 'doing', + + # Prepositions + 'about', 'above', 'across', 'after', 'against', 'along', 'among', 'around', + 'at', 'before', 'behind', 'below', 'beneath', 'beside', 'between', 'beyond', + 'by', 'down', 'during', 'except', 'for', 'from', 'in', 'inside', 'into', + 'near', 'of', 'off', 'on', 'out', 'outside', 'over', 'past', 'through', + 'to', 'toward', 'under', 'underneath', 'until', 'up', 'upon', 'with', 'within', + + # Conjunctions + 'and', 'but', 'or', 'nor', 'for', 'yet', 'so', + 'although', 'because', 'since', 'unless', + + # Articles + 'a', 'an', 'the', + + # Other common words + 'this', 'that', 'these', 'those', + 'what', 'which', 'who', 'whom', 'whose', + 'when', 'where', 'why', 'how', + 'all', 'any', 'both', 'each', 'few', 'more', 'most', 'other', 'some', 'such', + 'can', 'cannot', "can't", 'could', "couldn't", + 'may', 'might', 'must', "mustn't", + 'shall', 'should', "shouldn't", + 'will', "won't", 'would', "wouldn't", + 'not', "n't", 'no', 'nor', 'none' + } + + # Single comprehension, more efficient than multiple passes + return [token for token in tokens + if len(token) > 2 + and token not in noise + and token not in STOP_WORDS + and not token.startswith('↑') + and not token.startswith('▲') + and not token.startswith('⬆')] + + +def generate_content_hash(content: str) -> str: + """Generate a unique hash for content""" + return xxhash.xxh64(content.encode()).hexdigest() + # return hashlib.sha256(content.encode()).hexdigest() + +def ensure_content_dirs(base_path: str) -> Dict[str, str]: + """Create content directories if they don't exist""" + dirs = { + 'html': 'html_content', + 'cleaned': 'cleaned_html', + 'markdown': 'markdown_content', + 'extracted': 'extracted_content', + 'screenshots': 'screenshots' + } + + content_paths = {} + for key, dirname in dirs.items(): + path = os.path.join(base_path, dirname) + os.makedirs(path, exist_ok=True) + content_paths[key] = path + + return content_paths \ No newline at end of file diff --git a/crawl4ai/version_manager.py b/crawl4ai/version_manager.py new file mode 100644 index 0000000..8ae2de2 --- /dev/null +++ b/crawl4ai/version_manager.py @@ -0,0 +1,30 @@ +# version_manager.py +import os +from pathlib import Path +from packaging import version +from . import __version__ + +class VersionManager: + def __init__(self): + self.home_dir = Path.home() / ".crawl4ai" + self.version_file = self.home_dir / "version.txt" + + def get_installed_version(self): + """Get the version recorded in home directory""" + if not self.version_file.exists(): + return None + try: + return version.parse(self.version_file.read_text().strip()) + except: + return None + + def update_version(self): + """Update the version file to current library version""" + self.version_file.write_text(__version__.__version__) + + def needs_update(self): + """Check if database needs update based on version""" + installed = self.get_installed_version() + current = version.parse(__version__.__version__) + return installed is None or installed < current + diff --git a/crawl4ai/web_crawler.back.py b/crawl4ai/web_crawler.back.py deleted file mode 100644 index af78f12..0000000 --- a/crawl4ai/web_crawler.back.py +++ /dev/null @@ -1,357 +0,0 @@ -import os, time -os.environ["TOKENIZERS_PARALLELISM"] = "false" -from pathlib import Path - -from .models import UrlModel, CrawlResult -from .database import init_db, get_cached_url, cache_url, DB_PATH, flush_db -from .utils import * -from .chunking_strategy import * -from .extraction_strategy import * -from .crawler_strategy import * -from typing import List -from concurrent.futures import ThreadPoolExecutor -from .config import * - - -class WebCrawler: - def __init__( - self, - # db_path: str = None, - crawler_strategy: CrawlerStrategy = None, - always_by_pass_cache: bool = False, - verbose: bool = False, - ): - # self.db_path = db_path - self.crawler_strategy = crawler_strategy or LocalSeleniumCrawlerStrategy(verbose=verbose) - self.always_by_pass_cache = always_by_pass_cache - - # Create the .crawl4ai folder in the user's home directory if it doesn't exist - self.crawl4ai_folder = os.path.join(Path.home(), ".crawl4ai") - os.makedirs(self.crawl4ai_folder, exist_ok=True) - os.makedirs(f"{self.crawl4ai_folder}/cache", exist_ok=True) - - # If db_path is not provided, use the default path - # if not db_path: - # self.db_path = f"{self.crawl4ai_folder}/crawl4ai.db" - - # flush_db() - init_db() - - self.ready = False - - def warmup(self): - print("[LOG] 🌤️ Warming up the WebCrawler") - result = self.run( - url='https://crawl4ai.uccode.io/', - word_count_threshold=5, - extraction_strategy= NoExtractionStrategy(), - bypass_cache=False, - verbose = False - ) - self.ready = True - print("[LOG] 🌞 WebCrawler is ready to crawl") - - def fetch_page( - self, - url_model: UrlModel, - provider: str = DEFAULT_PROVIDER, - api_token: str = None, - extract_blocks_flag: bool = True, - word_count_threshold=MIN_WORD_THRESHOLD, - css_selector: str = None, - screenshot: bool = False, - use_cached_html: bool = False, - extraction_strategy: ExtractionStrategy = None, - chunking_strategy: ChunkingStrategy = RegexChunking(), - **kwargs, - ) -> CrawlResult: - return self.run( - url_model.url, - word_count_threshold, - extraction_strategy or NoExtractionStrategy(), - chunking_strategy, - bypass_cache=url_model.forced, - css_selector=css_selector, - screenshot=screenshot, - **kwargs, - ) - pass - - def run_old( - self, - url: str, - word_count_threshold=MIN_WORD_THRESHOLD, - extraction_strategy: ExtractionStrategy = None, - chunking_strategy: ChunkingStrategy = RegexChunking(), - bypass_cache: bool = False, - css_selector: str = None, - screenshot: bool = False, - user_agent: str = None, - verbose=True, - **kwargs, - ) -> CrawlResult: - if user_agent: - self.crawler_strategy.update_user_agent(user_agent) - extraction_strategy = extraction_strategy or NoExtractionStrategy() - extraction_strategy.verbose = verbose - # Check if extraction strategy is an instance of ExtractionStrategy if not raise an error - if not isinstance(extraction_strategy, ExtractionStrategy): - raise ValueError("Unsupported extraction strategy") - if not isinstance(chunking_strategy, ChunkingStrategy): - raise ValueError("Unsupported chunking strategy") - - # make sure word_count_threshold is not lesser than MIN_WORD_THRESHOLD - if word_count_threshold < MIN_WORD_THRESHOLD: - word_count_threshold = MIN_WORD_THRESHOLD - - # Check cache first - if not bypass_cache and not self.always_by_pass_cache: - cached = get_cached_url(url) - if cached: - return CrawlResult( - **{ - "url": cached[0], - "html": cached[1], - "cleaned_html": cached[2], - "markdown": cached[3], - "extracted_content": cached[4], - "success": cached[5], - "media": json.loads(cached[6] or "{}"), - "links": json.loads(cached[7] or "{}"), - "metadata": json.loads(cached[8] or "{}"), # "metadata": "{} - "screenshot": cached[9], - "error_message": "", - } - ) - - # Initialize WebDriver for crawling - t = time.time() - if kwargs.get("js", None): - self.crawler_strategy.js_code = kwargs.get("js") - html = self.crawler_strategy.crawl(url) - base64_image = None - if screenshot: - base64_image = self.crawler_strategy.take_screenshot() - success = True - error_message = "" - # Extract content from HTML - try: - result = get_content_of_website(url, html, word_count_threshold, css_selector=css_selector) - metadata = extract_metadata(html) - if result is None: - raise ValueError(f"Failed to extract content from the website: {url}") - except InvalidCSSSelectorError as e: - raise ValueError(str(e)) - - cleaned_html = result.get("cleaned_html", "") - markdown = result.get("markdown", "") - media = result.get("media", []) - links = result.get("links", []) - - # Print a profession LOG style message, show time taken and say crawling is done - if verbose: - print( - f"[LOG] 🚀 Crawling done for {url}, success: {success}, time taken: {time.time() - t} seconds" - ) - - extracted_content = [] - if verbose: - print(f"[LOG] 🔥 Extracting semantic blocks for {url}, Strategy: {extraction_strategy.name}") - t = time.time() - # Split markdown into sections - sections = chunking_strategy.chunk(markdown) - # sections = merge_chunks_based_on_token_threshold(sections, CHUNK_TOKEN_THRESHOLD) - - extracted_content = extraction_strategy.run( - url, sections, - ) - extracted_content = json.dumps(extracted_content) - - if verbose: - print( - f"[LOG] 🚀 Extraction done for {url}, time taken: {time.time() - t} seconds." - ) - - # Cache the result - cleaned_html = beautify_html(cleaned_html) - cache_url( - url, - html, - cleaned_html, - markdown, - extracted_content, - success, - json.dumps(media), - json.dumps(links), - json.dumps(metadata), - screenshot=base64_image, - ) - - return CrawlResult( - url=url, - html=html, - cleaned_html=cleaned_html, - markdown=markdown, - media=media, - links=links, - metadata=metadata, - screenshot=base64_image, - extracted_content=extracted_content, - success=success, - error_message=error_message, - ) - - def fetch_pages( - self, - url_models: List[UrlModel], - provider: str = DEFAULT_PROVIDER, - api_token: str = None, - extract_blocks_flag: bool = True, - word_count_threshold=MIN_WORD_THRESHOLD, - use_cached_html: bool = False, - css_selector: str = None, - screenshot: bool = False, - extraction_strategy: ExtractionStrategy = None, - chunking_strategy: ChunkingStrategy = RegexChunking(), - **kwargs, - ) -> List[CrawlResult]: - extraction_strategy = extraction_strategy or NoExtractionStrategy() - def fetch_page_wrapper(url_model, *args, **kwargs): - return self.fetch_page(url_model, *args, **kwargs) - - with ThreadPoolExecutor() as executor: - results = list( - executor.map( - fetch_page_wrapper, - url_models, - [provider] * len(url_models), - [api_token] * len(url_models), - [extract_blocks_flag] * len(url_models), - [word_count_threshold] * len(url_models), - [css_selector] * len(url_models), - [screenshot] * len(url_models), - [use_cached_html] * len(url_models), - [extraction_strategy] * len(url_models), - [chunking_strategy] * len(url_models), - *[kwargs] * len(url_models), - ) - ) - - return results - - def run( - self, - url: str, - word_count_threshold=MIN_WORD_THRESHOLD, - extraction_strategy: ExtractionStrategy = None, - chunking_strategy: ChunkingStrategy = RegexChunking(), - bypass_cache: bool = False, - css_selector: str = None, - screenshot: bool = False, - user_agent: str = None, - verbose=True, - **kwargs, - ) -> CrawlResult: - extraction_strategy = extraction_strategy or NoExtractionStrategy() - extraction_strategy.verbose = verbose - if not isinstance(extraction_strategy, ExtractionStrategy): - raise ValueError("Unsupported extraction strategy") - if not isinstance(chunking_strategy, ChunkingStrategy): - raise ValueError("Unsupported chunking strategy") - - if word_count_threshold < MIN_WORD_THRESHOLD: - word_count_threshold = MIN_WORD_THRESHOLD - - # Check cache first - cached = None - extracted_content = None - if not bypass_cache and not self.always_by_pass_cache: - cached = get_cached_url(url) - - if cached: - html = cached[1] - extracted_content = cached[2] - if screenshot: - screenshot = cached[9] - - else: - if user_agent: - self.crawler_strategy.update_user_agent(user_agent) - html = self.crawler_strategy.crawl(url) - if screenshot: - screenshot = self.crawler_strategy.take_screenshot() - - return self.process_html(url, html, extracted_content, word_count_threshold, extraction_strategy, chunking_strategy, css_selector, screenshot, verbose, bool(cached), **kwargs) - - def process_html( - self, - url: str, - html: str, - extracted_content: str, - word_count_threshold: int, - extraction_strategy: ExtractionStrategy, - chunking_strategy: ChunkingStrategy, - css_selector: str, - screenshot: bool, - verbose: bool, - is_cached: bool, - **kwargs, - ) -> CrawlResult: - t = time.time() - # Extract content from HTML - try: - result = get_content_of_website(url, html, word_count_threshold, css_selector=css_selector) - metadata = extract_metadata(html) - if result is None: - raise ValueError(f"Failed to extract content from the website: {url}") - except InvalidCSSSelectorError as e: - raise ValueError(str(e)) - - cleaned_html = result.get("cleaned_html", "") - markdown = result.get("markdown", "") - media = result.get("media", []) - links = result.get("links", []) - - if verbose: - print(f"[LOG] 🚀 Crawling done for {url}, success: True, time taken: {time.time() - t} seconds") - - if extracted_content is None: - if verbose: - print(f"[LOG] 🔥 Extracting semantic blocks for {url}, Strategy: {extraction_strategy.name}") - - sections = chunking_strategy.chunk(markdown) - extracted_content = extraction_strategy.run(url, sections) - extracted_content = json.dumps(extracted_content) - - if verbose: - print(f"[LOG] 🚀 Extraction done for {url}, time taken: {time.time() - t} seconds.") - - screenshot = None if not screenshot else screenshot - - if not is_cached: - cache_url( - url, - html, - cleaned_html, - markdown, - extracted_content, - True, - json.dumps(media), - json.dumps(links), - json.dumps(metadata), - screenshot=screenshot, - ) - - return CrawlResult( - url=url, - html=html, - cleaned_html=cleaned_html, - markdown=markdown, - media=media, - links=links, - metadata=metadata, - screenshot=screenshot, - extracted_content=extracted_content, - success=True, - error_message="", - ) \ No newline at end of file diff --git a/crawl4ai/web_crawler.py b/crawl4ai/web_crawler.py index 20e9b04..a32a988 100644 --- a/crawl4ai/web_crawler.py +++ b/crawl4ai/web_crawler.py @@ -10,6 +10,7 @@ from .crawler_strategy import * from typing import List from concurrent.futures import ThreadPoolExecutor +from .content_scraping_strategy import WebScrapingStrategy from .config import * import warnings import json @@ -20,7 +21,7 @@ class WebCrawler: def __init__(self, crawler_strategy: CrawlerStrategy = None, always_by_pass_cache: bool = False, verbose: bool = False): self.crawler_strategy = crawler_strategy or LocalSeleniumCrawlerStrategy(verbose=verbose) self.always_by_pass_cache = always_by_pass_cache - self.crawl4ai_folder = os.path.join(Path.home(), ".crawl4ai") + self.crawl4ai_folder = os.path.join(os.getenv("CRAWL4_AI_BASE_DIRECTORY", Path.home()), ".crawl4ai") os.makedirs(self.crawl4ai_folder, exist_ok=True) os.makedirs(f"{self.crawl4ai_folder}/cache", exist_ok=True) init_db() @@ -181,7 +182,21 @@ def process_html( # Extract content from HTML try: t1 = time.time() - result = get_content_of_website_optimized(url, html, word_count_threshold, css_selector=css_selector, only_text=kwargs.get("only_text", False)) + scrapping_strategy = WebScrapingStrategy() + extra_params = {k: v for k, v in kwargs.items() if k not in ["only_text", "image_description_min_word_threshold"]} + result = scrapping_strategy.scrap( + url, + html, + word_count_threshold=word_count_threshold, + css_selector=css_selector, + only_text=kwargs.get("only_text", False), + image_description_min_word_threshold=kwargs.get( + "image_description_min_word_threshold", IMAGE_DESCRIPTION_MIN_WORD_THRESHOLD + ), + **extra_params, + ) + + # result = get_content_of_website_optimized(url, html, word_count_threshold, css_selector=css_selector, only_text=kwargs.get("only_text", False)) if verbose: print(f"[LOG] 🚀 Content extracted for {url}, success: True, time taken: {time.time() - t1:.2f} seconds") diff --git a/deploy/railway/README.md b/deploy/railway/README.md new file mode 100644 index 0000000..155e764 --- /dev/null +++ b/deploy/railway/README.md @@ -0,0 +1,19 @@ +# Railway Deployment + +## Quick Deploy +[![Deploy on Railway](https://railway.app/button.svg)](https://railway.app/template/crawl4ai) + +## Manual Setup +1. Fork this repository +2. Create a new Railway project +3. Configure environment variables: + - `INSTALL_TYPE`: basic or all + - `ENABLE_GPU`: true/false +4. Deploy! + +## Configuration +See `railway.toml` for: +- Memory limits +- Health checks +- Restart policies +- Scaling options \ No newline at end of file diff --git a/deploy/railway/button.json b/deploy/railway/button.json new file mode 100644 index 0000000..1fc5216 --- /dev/null +++ b/deploy/railway/button.json @@ -0,0 +1,33 @@ +{ + "name": "Crawl4AI", + "description": "LLM Friendly Web Crawler & Scraper", + "render": { + "dockerfile": { + "path": "Dockerfile" + } + }, + "env": [ + { + "key": "INSTALL_TYPE", + "description": "Installation type (basic/all)", + "default": "basic", + "required": true + }, + { + "key": "ENABLE_GPU", + "description": "Enable GPU support", + "default": "false", + "required": false + } + ], + "services": [ + { + "name": "web", + "dockerfile": "./Dockerfile", + "healthcheck": { + "path": "/health", + "port": 11235 + } + } + ] + } \ No newline at end of file diff --git a/deploy/railway/railway.toml b/deploy/railway/railway.toml new file mode 100644 index 0000000..f24d8fa --- /dev/null +++ b/deploy/railway/railway.toml @@ -0,0 +1,18 @@ +# railway.toml +[build] +builder = "DOCKERFILE" +dockerfilePath = "Dockerfile" + +[deploy] +startCommand = "uvicorn main:app --host 0.0.0.0 --port $PORT" +healthcheckPath = "/health" +restartPolicyType = "ON_FAILURE" +restartPolicyMaxRetries = 3 + +[deploy.memory] +soft = 2048 # 2GB min for Playwright +hard = 4096 # 4GB max + +[deploy.scaling] +min = 1 +max = 1 diff --git a/docker-compose.hub.yml b/docker-compose.hub.yml new file mode 100644 index 0000000..9bcfa98 --- /dev/null +++ b/docker-compose.hub.yml @@ -0,0 +1,27 @@ +services: + crawl4ai: + image: unclecode/crawl4ai:basic # Pull image from Docker Hub + ports: + - "11235:11235" # FastAPI server + - "8000:8000" # Alternative port + - "9222:9222" # Browser debugging + - "8080:8080" # Additional port + environment: + - CRAWL4AI_API_TOKEN=${CRAWL4AI_API_TOKEN:-} # Optional API token + - OPENAI_API_KEY=${OPENAI_API_KEY:-} # Optional OpenAI API key + - CLAUDE_API_KEY=${CLAUDE_API_KEY:-} # Optional Claude API key + volumes: + - /dev/shm:/dev/shm # Shared memory for browser operations + deploy: + resources: + limits: + memory: 4G + reservations: + memory: 1G + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:11235/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s diff --git a/docker-compose.local.yml b/docker-compose.local.yml new file mode 100644 index 0000000..7dc41b4 --- /dev/null +++ b/docker-compose.local.yml @@ -0,0 +1,33 @@ +services: + crawl4ai: + build: + context: . + dockerfile: Dockerfile + args: + PYTHON_VERSION: 3.10 + INSTALL_TYPE: all + ENABLE_GPU: false + ports: + - "11235:11235" # FastAPI server + - "8000:8000" # Alternative port + - "9222:9222" # Browser debugging + - "8080:8080" # Additional port + environment: + - CRAWL4AI_API_TOKEN=${CRAWL4AI_API_TOKEN:-} # Optional API token + - OPENAI_API_KEY=${OPENAI_API_KEY:-} # Optional OpenAI API key + - CLAUDE_API_KEY=${CLAUDE_API_KEY:-} # Optional Claude API key + volumes: + - /dev/shm:/dev/shm # Shared memory for browser operations + deploy: + resources: + limits: + memory: 4G + reservations: + memory: 1G + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:11235/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..1097ef1 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,62 @@ +services: + crawl4ai: + build: + context: . + dockerfile: Dockerfile + args: + PYTHON_VERSION: 3.10 + INSTALL_TYPE: all + ENABLE_GPU: false + profiles: ["local"] + ports: + - "11235:11235" + - "8000:8000" + - "9222:9222" + - "8080:8080" + environment: + - CRAWL4AI_API_TOKEN=${CRAWL4AI_API_TOKEN:-} + - OPENAI_API_KEY=${OPENAI_API_KEY:-} + - CLAUDE_API_KEY=${CLAUDE_API_KEY:-} + volumes: + - /dev/shm:/dev/shm + deploy: + resources: + limits: + memory: 4G + reservations: + memory: 1G + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:11235/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s + + crawl4ai-hub: + image: unclecode/crawl4ai:basic + profiles: ["hub"] + ports: + - "11235:11235" + - "8000:8000" + - "9222:9222" + - "8080:8080" + environment: + - CRAWL4AI_API_TOKEN=${CRAWL4AI_API_TOKEN:-} + - OPENAI_API_KEY=${OPENAI_API_KEY:-} + - CLAUDE_API_KEY=${CLAUDE_API_KEY:-} + volumes: + - /dev/shm:/dev/shm + deploy: + resources: + limits: + memory: 4G + reservations: + memory: 1G + restart: unless-stopped + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:11235/health"] + interval: 30s + timeout: 10s + retries: 3 + start_period: 40s diff --git a/docs/.DS_Store b/docs/.DS_Store deleted file mode 100644 index 2502f79..0000000 Binary files a/docs/.DS_Store and /dev/null differ diff --git a/docs/assets/pitch-dark.png b/docs/assets/pitch-dark.png new file mode 100644 index 0000000..9b9b37b Binary files /dev/null and b/docs/assets/pitch-dark.png differ diff --git a/docs/assets/pitch-dark.svg b/docs/assets/pitch-dark.svg new file mode 100644 index 0000000..0913b2d --- /dev/null +++ b/docs/assets/pitch-dark.svg @@ -0,0 +1,64 @@ + + + + + + + + + Data Capitalization Opportunity + + Transform digital footprints into assets + Personal data as capital + Enterprise knowledge valuation + New form of wealth creation + + + + + Authentic Data Potential + + Vast reservoir of real insights + Enhanced AI development + Diverse human knowledge + Willing participation model + + + + + + + + 1. Open-Source Foundation + Data extraction engine & community development + + + + 2. Data Capitalization Platform + Tools to structure & value digital assets + + + + 3. Shared Data Marketplace + Economic platform for data exchange + + + + + + + + + + + + + + + + + + + Economic Vision: Shared Data Economy + + \ No newline at end of file diff --git a/docs/chunking_strategies.json b/docs/chunking_strategies.json deleted file mode 100644 index b0d2a6b..0000000 --- a/docs/chunking_strategies.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "RegexChunking": "### RegexChunking\n\n`RegexChunking` is a text chunking strategy that splits a given text into smaller parts using regular expressions.\nThis is useful for preparing large texts for processing by language models, ensuring they are divided into manageable segments.\n\n#### Constructor Parameters:\n- `patterns` (list, optional): A list of regular expression patterns used to split the text. Default is to split by double newlines (`['\\n\\n']`).\n\n#### Example usage:\n```python\nchunker = RegexChunking(patterns=[r'\\n\\n', r'\\. '])\nchunks = chunker.chunk(\"This is a sample text. It will be split into chunks.\")\n```", - - "NlpSentenceChunking": "### NlpSentenceChunking\n\n`NlpSentenceChunking` uses a natural language processing model to chunk a given text into sentences. This approach leverages SpaCy to accurately split text based on sentence boundaries.\n\n#### Constructor Parameters:\n- None.\n\n#### Example usage:\n```python\nchunker = NlpSentenceChunking()\nchunks = chunker.chunk(\"This is a sample text. It will be split into sentences.\")\n```", - - "TopicSegmentationChunking": "### TopicSegmentationChunking\n\n`TopicSegmentationChunking` uses the TextTiling algorithm to segment a given text into topic-based chunks. This method identifies thematic boundaries in the text.\n\n#### Constructor Parameters:\n- `num_keywords` (int, optional): The number of keywords to extract for each topic segment. Default is `3`.\n\n#### Example usage:\n```python\nchunker = TopicSegmentationChunking(num_keywords=3)\nchunks = chunker.chunk(\"This is a sample text. It will be split into topic-based segments.\")\n```", - - "FixedLengthWordChunking": "### FixedLengthWordChunking\n\n`FixedLengthWordChunking` splits a given text into chunks of fixed length, based on the number of words.\n\n#### Constructor Parameters:\n- `chunk_size` (int, optional): The number of words in each chunk. Default is `100`.\n\n#### Example usage:\n```python\nchunker = FixedLengthWordChunking(chunk_size=100)\nchunks = chunker.chunk(\"This is a sample text. It will be split into fixed-length word chunks.\")\n```", - - "SlidingWindowChunking": "### SlidingWindowChunking\n\n`SlidingWindowChunking` uses a sliding window approach to chunk a given text. Each chunk has a fixed length, and the window slides by a specified step size.\n\n#### Constructor Parameters:\n- `window_size` (int, optional): The number of words in each chunk. Default is `100`.\n- `step` (int, optional): The number of words to slide the window. Default is `50`.\n\n#### Example usage:\n```python\nchunker = SlidingWindowChunking(window_size=100, step=50)\nchunks = chunker.chunk(\"This is a sample text. It will be split using a sliding window approach.\")\n```" - } - \ No newline at end of file diff --git a/docs/examples/docker_example.py b/docs/examples/docker_example.py new file mode 100644 index 0000000..17ef9f0 --- /dev/null +++ b/docs/examples/docker_example.py @@ -0,0 +1,357 @@ +import requests +import json +import time +import sys +import base64 +import os +from typing import Dict, Any + +class Crawl4AiTester: + def __init__(self, base_url: str = "http://localhost:11235", api_token: str = None): + self.base_url = base_url + self.api_token = api_token or os.getenv('CRAWL4AI_API_TOKEN') or "test_api_code" # Check environment variable as fallback + self.headers = {'Authorization': f'Bearer {self.api_token}'} if self.api_token else {} + + def submit_and_wait(self, request_data: Dict[str, Any], timeout: int = 300) -> Dict[str, Any]: + # Submit crawl job + response = requests.post(f"{self.base_url}/crawl", json=request_data, headers=self.headers) + if response.status_code == 403: + raise Exception("API token is invalid or missing") + task_id = response.json()["task_id"] + print(f"Task ID: {task_id}") + + # Poll for result + start_time = time.time() + while True: + if time.time() - start_time > timeout: + raise TimeoutError(f"Task {task_id} did not complete within {timeout} seconds") + + result = requests.get(f"{self.base_url}/task/{task_id}", headers=self.headers) + status = result.json() + + if status["status"] == "failed": + print("Task failed:", status.get("error")) + raise Exception(f"Task failed: {status.get('error')}") + + if status["status"] == "completed": + return status + + time.sleep(2) + + def submit_sync(self, request_data: Dict[str, Any]) -> Dict[str, Any]: + response = requests.post(f"{self.base_url}/crawl_sync", json=request_data, headers=self.headers, timeout=60) + if response.status_code == 408: + raise TimeoutError("Task did not complete within server timeout") + response.raise_for_status() + return response.json() + + def crawl_direct(self, request_data: Dict[str, Any]) -> Dict[str, Any]: + """Directly crawl without using task queue""" + response = requests.post( + f"{self.base_url}/crawl_direct", + json=request_data, + headers=self.headers + ) + response.raise_for_status() + return response.json() + +def test_docker_deployment(version="basic"): + tester = Crawl4AiTester( + base_url="http://localhost:11235" , + # base_url="https://api.crawl4ai.com" # just for example + # api_token="test" # just for example + ) + print(f"Testing Crawl4AI Docker {version} version") + + # Health check with timeout and retry + max_retries = 5 + for i in range(max_retries): + try: + health = requests.get(f"{tester.base_url}/health", timeout=10) + print("Health check:", health.json()) + break + except requests.exceptions.RequestException as e: + if i == max_retries - 1: + print(f"Failed to connect after {max_retries} attempts") + sys.exit(1) + print(f"Waiting for service to start (attempt {i+1}/{max_retries})...") + time.sleep(5) + + # Test cases based on version + # test_basic_crawl(tester) + # test_basic_crawl(tester) + # test_basic_crawl_sync(tester) + test_basic_crawl_direct(tester) + + # if version in ["full", "transformer"]: + # test_cosine_extraction(tester) + + # test_js_execution(tester) + # test_css_selector(tester) + # test_structured_extraction(tester) + # test_llm_extraction(tester) + # test_llm_with_ollama(tester) + # test_screenshot(tester) + + +def test_basic_crawl(tester: Crawl4AiTester): + print("\n=== Testing Basic Crawl ===") + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 10, + "session_id": "test" + } + + result = tester.submit_and_wait(request) + print(f"Basic crawl result length: {len(result['result']['markdown'])}") + assert result["result"]["success"] + assert len(result["result"]["markdown"]) > 0 + +def test_basic_crawl_sync(tester: Crawl4AiTester): + print("\n=== Testing Basic Crawl (Sync) ===") + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 10, + "session_id": "test" + } + + result = tester.submit_sync(request) + print(f"Basic crawl result length: {len(result['result']['markdown'])}") + assert result['status'] == 'completed' + assert result['result']['success'] + assert len(result['result']['markdown']) > 0 + +def test_basic_crawl_direct(tester: Crawl4AiTester): + print("\n=== Testing Basic Crawl (Direct) ===") + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 10, + # "session_id": "test" + "cache_mode": "bypass" # or "enabled", "disabled", "read_only", "write_only" + } + + result = tester.crawl_direct(request) + print(f"Basic crawl result length: {len(result['result']['markdown'])}") + assert result['result']['success'] + assert len(result['result']['markdown']) > 0 + +def test_js_execution(tester: Crawl4AiTester): + print("\n=== Testing JS Execution ===") + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 8, + "js_code": [ + "const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); loadMoreButton && loadMoreButton.click();" + ], + "wait_for": "article.tease-card:nth-child(10)", + "crawler_params": { + "headless": True + } + } + + result = tester.submit_and_wait(request) + print(f"JS execution result length: {len(result['result']['markdown'])}") + assert result["result"]["success"] + +def test_css_selector(tester: Crawl4AiTester): + print("\n=== Testing CSS Selector ===") + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 7, + "css_selector": ".wide-tease-item__description", + "crawler_params": { + "headless": True + }, + "extra": {"word_count_threshold": 10} + + } + + result = tester.submit_and_wait(request) + print(f"CSS selector result length: {len(result['result']['markdown'])}") + assert result["result"]["success"] + +def test_structured_extraction(tester: Crawl4AiTester): + print("\n=== Testing Structured Extraction ===") + schema = { + "name": "Coinbase Crypto Prices", + "baseSelector": ".cds-tableRow-t45thuk", + "fields": [ + { + "name": "crypto", + "selector": "td:nth-child(1) h2", + "type": "text", + }, + { + "name": "symbol", + "selector": "td:nth-child(1) p", + "type": "text", + }, + { + "name": "price", + "selector": "td:nth-child(2)", + "type": "text", + } + ], + } + + request = { + "urls": "https://www.coinbase.com/explore", + "priority": 9, + "extraction_config": { + "type": "json_css", + "params": { + "schema": schema + } + } + } + + result = tester.submit_and_wait(request) + extracted = json.loads(result["result"]["extracted_content"]) + print(f"Extracted {len(extracted)} items") + print("Sample item:", json.dumps(extracted[0], indent=2)) + assert result["result"]["success"] + assert len(extracted) > 0 + +def test_llm_extraction(tester: Crawl4AiTester): + print("\n=== Testing LLM Extraction ===") + schema = { + "type": "object", + "properties": { + "model_name": { + "type": "string", + "description": "Name of the OpenAI model." + }, + "input_fee": { + "type": "string", + "description": "Fee for input token for the OpenAI model." + }, + "output_fee": { + "type": "string", + "description": "Fee for output token for the OpenAI model." + } + }, + "required": ["model_name", "input_fee", "output_fee"] + } + + request = { + "urls": "https://openai.com/api/pricing", + "priority": 8, + "extraction_config": { + "type": "llm", + "params": { + "provider": "openai/gpt-4o-mini", + "api_token": os.getenv("OPENAI_API_KEY"), + "schema": schema, + "extraction_type": "schema", + "instruction": """From the crawled content, extract all mentioned model names along with their fees for input and output tokens.""" + } + }, + "crawler_params": {"word_count_threshold": 1} + } + + try: + result = tester.submit_and_wait(request) + extracted = json.loads(result["result"]["extracted_content"]) + print(f"Extracted {len(extracted)} model pricing entries") + print("Sample entry:", json.dumps(extracted[0], indent=2)) + assert result["result"]["success"] + except Exception as e: + print(f"LLM extraction test failed (might be due to missing API key): {str(e)}") + +def test_llm_with_ollama(tester: Crawl4AiTester): + print("\n=== Testing LLM with Ollama ===") + schema = { + "type": "object", + "properties": { + "article_title": { + "type": "string", + "description": "The main title of the news article" + }, + "summary": { + "type": "string", + "description": "A brief summary of the article content" + }, + "main_topics": { + "type": "array", + "items": {"type": "string"}, + "description": "Main topics or themes discussed in the article" + } + } + } + + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 8, + "extraction_config": { + "type": "llm", + "params": { + "provider": "ollama/llama2", + "schema": schema, + "extraction_type": "schema", + "instruction": "Extract the main article information including title, summary, and main topics." + } + }, + "extra": {"word_count_threshold": 1}, + "crawler_params": {"verbose": True} + } + + try: + result = tester.submit_and_wait(request) + extracted = json.loads(result["result"]["extracted_content"]) + print("Extracted content:", json.dumps(extracted, indent=2)) + assert result["result"]["success"] + except Exception as e: + print(f"Ollama extraction test failed: {str(e)}") + +def test_cosine_extraction(tester: Crawl4AiTester): + print("\n=== Testing Cosine Extraction ===") + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 8, + "extraction_config": { + "type": "cosine", + "params": { + "semantic_filter": "business finance economy", + "word_count_threshold": 10, + "max_dist": 0.2, + "top_k": 3 + } + } + } + + try: + result = tester.submit_and_wait(request) + extracted = json.loads(result["result"]["extracted_content"]) + print(f"Extracted {len(extracted)} text clusters") + print("First cluster tags:", extracted[0]["tags"]) + assert result["result"]["success"] + except Exception as e: + print(f"Cosine extraction test failed: {str(e)}") + +def test_screenshot(tester: Crawl4AiTester): + print("\n=== Testing Screenshot ===") + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 5, + "screenshot": True, + "crawler_params": { + "headless": True + } + } + + result = tester.submit_and_wait(request) + print("Screenshot captured:", bool(result["result"]["screenshot"])) + + if result["result"]["screenshot"]: + # Save screenshot + screenshot_data = base64.b64decode(result["result"]["screenshot"]) + with open("test_screenshot.jpg", "wb") as f: + f.write(screenshot_data) + print("Screenshot saved as test_screenshot.jpg") + + assert result["result"]["success"] + +if __name__ == "__main__": + version = sys.argv[1] if len(sys.argv) > 1 else "basic" + # version = "full" + test_docker_deployment(version) \ No newline at end of file diff --git a/docs/examples/quickstart.ipynb b/docs/examples/quickstart.ipynb index 09ad623..4751dec 100644 --- a/docs/examples/quickstart.ipynb +++ b/docs/examples/quickstart.ipynb @@ -1,736 +1,664 @@ { - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "6yLvrXn7yZQI" - }, - "source": [ - "# Crawl4AI: Advanced Web Crawling and Data Extraction\n", - "\n", - "Welcome to this interactive notebook showcasing Crawl4AI, an advanced asynchronous web crawling and data extraction library.\n", - "\n", - "- GitHub Repository: [https://github.com/unclecode/crawl4ai](https://github.com/unclecode/crawl4ai)\n", - "- Twitter: [@unclecode](https://twitter.com/unclecode)\n", - "- Website: [https://crawl4ai.com](https://crawl4ai.com)\n", - "\n", - "Let's explore the powerful features of Crawl4AI!" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KIn_9nxFyZQK" - }, - "source": [ - "## Installation\n", - "\n", - "First, let's install Crawl4AI from GitHub:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "mSnaxLf3zMog" - }, - "outputs": [], - "source": [ - "!sudo apt-get update && sudo apt-get install -y libwoff1 libopus0 libwebp6 libwebpdemux2 libenchant1c2a libgudev-1.0-0 libsecret-1-0 libhyphen0 libgdk-pixbuf2.0-0 libegl1 libnotify4 libxslt1.1 libevent-2.1-7 libgles2 libvpx6 libxcomposite1 libatk1.0-0 libatk-bridge2.0-0 libepoxy0 libgtk-3-0 libharfbuzz-icu0" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "xlXqaRtayZQK" - }, - "outputs": [], - "source": [ - "# !pip install \"crawl4ai @ git+https://github.com/unclecode/crawl4ai.git\"\n", - "!pip install \"crawl4ai @ git+https://github.com/unclecode/crawl4ai.git@staging\"\n", - "!pip install nest-asyncio\n", - "!playwright install" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "qKCE7TI7yZQL" - }, - "source": [ - "Now, let's import the necessary libraries:" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": { - "id": "I67tr7aAyZQL" - }, - "outputs": [], - "source": [ - "import asyncio\n", - "import nest_asyncio\n", - "from crawl4ai import AsyncWebCrawler\n", - "from crawl4ai.extraction_strategy import JsonCssExtractionStrategy, LLMExtractionStrategy\n", - "import json\n", - "import time\n", - "from pydantic import BaseModel, Field\n", - "\n", - "nest_asyncio.apply()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "h7yR_Rt_yZQM" - }, - "source": [ - "## Basic Usage\n", - "\n", - "Let's start with a simple crawl example:" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "yBh6hf4WyZQM", - "outputId": "0f83af5c-abba-4175-ed95-70b7512e6bcc" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[LOG] 🌤️ Warming up the AsyncWebCrawler\n", - "[LOG] 🌞 AsyncWebCrawler is ready to crawl\n", - "[LOG] 🚀 Content extracted for https://www.nbcnews.com/business, success: True, time taken: 0.05 seconds\n", - "[LOG] 🚀 Extraction done for https://www.nbcnews.com/business, time taken: 0.05 seconds.\n", - "18102\n" - ] - } - ], - "source": [ - "async def simple_crawl():\n", - " async with AsyncWebCrawler(verbose=True) as crawler:\n", - " result = await crawler.arun(url=\"https://www.nbcnews.com/business\")\n", - " print(len(result.markdown))\n", - "await simple_crawl()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "9rtkgHI28uI4" - }, - "source": [ - "💡 By default, **Crawl4AI** caches the result of every URL, so the next time you call it, you’ll get an instant result. But if you want to bypass the cache, just set `bypass_cache=True`." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "MzZ0zlJ9yZQM" - }, - "source": [ - "## Advanced Features\n", - "\n", - "### Executing JavaScript and Using CSS Selectors" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "gHStF86xyZQM", - "outputId": "34d0fb6d-4dec-4677-f76e-85a1f082829b" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[LOG] 🌤️ Warming up the AsyncWebCrawler\n", - "[LOG] 🌞 AsyncWebCrawler is ready to crawl\n", - "[LOG] 🕸️ Crawling https://www.nbcnews.com/business using AsyncPlaywrightCrawlerStrategy...\n", - "[LOG] ✅ Crawled https://www.nbcnews.com/business successfully!\n", - "[LOG] 🚀 Crawling done for https://www.nbcnews.com/business, success: True, time taken: 6.06 seconds\n", - "[LOG] 🚀 Content extracted for https://www.nbcnews.com/business, success: True, time taken: 0.10 seconds\n", - "[LOG] 🔥 Extracting semantic blocks for https://www.nbcnews.com/business, Strategy: AsyncWebCrawler\n", - "[LOG] 🚀 Extraction done for https://www.nbcnews.com/business, time taken: 0.11 seconds.\n", - "41135\n" - ] - } - ], - "source": [ - "async def js_and_css():\n", - " async with AsyncWebCrawler(verbose=True) as crawler:\n", - " js_code = [\"const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); loadMoreButton && loadMoreButton.click();\"]\n", - " result = await crawler.arun(\n", - " url=\"https://www.nbcnews.com/business\",\n", - " js_code=js_code,\n", - " # css_selector=\"YOUR_CSS_SELECTOR_HERE\",\n", - " bypass_cache=True\n", - " )\n", - " print(len(result.markdown))\n", - "\n", - "await js_and_css()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "cqE_W4coyZQM" - }, - "source": [ - "### Using a Proxy\n", - "\n", - "Note: You'll need to replace the proxy URL with a working proxy for this example to run successfully." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "QjAyiAGqyZQM" - }, - "outputs": [], - "source": [ - "async def use_proxy():\n", - " async with AsyncWebCrawler(verbose=True, proxy=\"http://your-proxy-url:port\") as crawler:\n", - " result = await crawler.arun(\n", - " url=\"https://www.nbcnews.com/business\",\n", - " bypass_cache=True\n", - " )\n", - " print(result.markdown[:500]) # Print first 500 characters\n", - "\n", - "# Uncomment the following line to run the proxy example\n", - "# await use_proxy()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "XTZ88lbayZQN" - }, - "source": [ - "### Extracting Structured Data with OpenAI\n", - "\n", - "Note: You'll need to set your OpenAI API key as an environment variable for this example to work." - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "fIOlDayYyZQN", - "outputId": "cb8359cc-dee0-4762-9698-5dfdcee055b8" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[LOG] 🌤️ Warming up the AsyncWebCrawler\n", - "[LOG] 🌞 AsyncWebCrawler is ready to crawl\n", - "[LOG] 🕸️ Crawling https://openai.com/api/pricing/ using AsyncPlaywrightCrawlerStrategy...\n", - "[LOG] ✅ Crawled https://openai.com/api/pricing/ successfully!\n", - "[LOG] 🚀 Crawling done for https://openai.com/api/pricing/, success: True, time taken: 3.77 seconds\n", - "[LOG] 🚀 Content extracted for https://openai.com/api/pricing/, success: True, time taken: 0.21 seconds\n", - "[LOG] 🔥 Extracting semantic blocks for https://openai.com/api/pricing/, Strategy: AsyncWebCrawler\n", - "[LOG] Call LLM for https://openai.com/api/pricing/ - block index: 0\n", - "[LOG] Call LLM for https://openai.com/api/pricing/ - block index: 1\n", - "[LOG] Call LLM for https://openai.com/api/pricing/ - block index: 2\n", - "[LOG] Call LLM for https://openai.com/api/pricing/ - block index: 3\n", - "[LOG] Extracted 4 blocks from URL: https://openai.com/api/pricing/ block index: 3\n", - "[LOG] Call LLM for https://openai.com/api/pricing/ - block index: 4\n", - "[LOG] Extracted 5 blocks from URL: https://openai.com/api/pricing/ block index: 0\n", - "[LOG] Extracted 1 blocks from URL: https://openai.com/api/pricing/ block index: 4\n", - "[LOG] Extracted 8 blocks from URL: https://openai.com/api/pricing/ block index: 1\n", - "[LOG] Extracted 12 blocks from URL: https://openai.com/api/pricing/ block index: 2\n", - "[LOG] 🚀 Extraction done for https://openai.com/api/pricing/, time taken: 8.55 seconds.\n", - "5029\n" - ] - } - ], - "source": [ - "import os\n", - "from google.colab import userdata\n", - "os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')\n", - "\n", - "class OpenAIModelFee(BaseModel):\n", - " model_name: str = Field(..., description=\"Name of the OpenAI model.\")\n", - " input_fee: str = Field(..., description=\"Fee for input token for the OpenAI model.\")\n", - " output_fee: str = Field(..., description=\"Fee for output token for the OpenAI model.\")\n", - "\n", - "async def extract_openai_fees():\n", - " async with AsyncWebCrawler(verbose=True) as crawler:\n", - " result = await crawler.arun(\n", - " url='https://openai.com/api/pricing/',\n", - " word_count_threshold=1,\n", - " extraction_strategy=LLMExtractionStrategy(\n", - " provider=\"openai/gpt-4o\", api_token=os.getenv('OPENAI_API_KEY'),\n", - " schema=OpenAIModelFee.schema(),\n", - " extraction_type=\"schema\",\n", - " instruction=\"\"\"From the crawled content, extract all mentioned model names along with their fees for input and output tokens.\n", - " Do not miss any models in the entire content. One extracted model JSON format should look like this:\n", - " {\"model_name\": \"GPT-4\", \"input_fee\": \"US$10.00 / 1M tokens\", \"output_fee\": \"US$30.00 / 1M tokens\"}.\"\"\"\n", - " ),\n", - " bypass_cache=True,\n", - " )\n", - " print(len(result.extracted_content))\n", - "\n", - "# Uncomment the following line to run the OpenAI extraction example\n", - "await extract_openai_fees()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "BypA5YxEyZQN" - }, - "source": [ - "### Advanced Multi-Page Crawling with JavaScript Execution" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "tfkcVQ0b7mw-" - }, - "source": [ - "## Advanced Multi-Page Crawling with JavaScript Execution\n", - "\n", - "This example demonstrates Crawl4AI's ability to handle complex crawling scenarios, specifically extracting commits from multiple pages of a GitHub repository. The challenge here is that clicking the \"Next\" button doesn't load a new page, but instead uses asynchronous JavaScript to update the content. This is a common hurdle in modern web crawling.\n", - "\n", - "To overcome this, we use Crawl4AI's custom JavaScript execution to simulate clicking the \"Next\" button, and implement a custom hook to detect when new data has loaded. Our strategy involves comparing the first commit's text before and after \"clicking\" Next, waiting until it changes to confirm new data has rendered. This showcases Crawl4AI's flexibility in handling dynamic content and its ability to implement custom logic for even the most challenging crawling tasks." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "qUBKGpn3yZQN", - "outputId": "3e555b6a-ed33-42f4-cce9-499a923fbe17" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[LOG] 🌤️ Warming up the AsyncWebCrawler\n", - "[LOG] 🌞 AsyncWebCrawler is ready to crawl\n", - "[LOG] 🕸️ Crawling https://github.com/microsoft/TypeScript/commits/main using AsyncPlaywrightCrawlerStrategy...\n", - "[LOG] ✅ Crawled https://github.com/microsoft/TypeScript/commits/main successfully!\n", - "[LOG] 🚀 Crawling done for https://github.com/microsoft/TypeScript/commits/main, success: True, time taken: 5.16 seconds\n", - "[LOG] 🚀 Content extracted for https://github.com/microsoft/TypeScript/commits/main, success: True, time taken: 0.28 seconds\n", - "[LOG] 🔥 Extracting semantic blocks for https://github.com/microsoft/TypeScript/commits/main, Strategy: AsyncWebCrawler\n", - "[LOG] 🚀 Extraction done for https://github.com/microsoft/TypeScript/commits/main, time taken: 0.28 seconds.\n", - "Page 1: Found 35 commits\n", - "[LOG] 🕸️ Crawling https://github.com/microsoft/TypeScript/commits/main using AsyncPlaywrightCrawlerStrategy...\n", - "[LOG] ✅ Crawled https://github.com/microsoft/TypeScript/commits/main successfully!\n", - "[LOG] 🚀 Crawling done for https://github.com/microsoft/TypeScript/commits/main, success: True, time taken: 0.78 seconds\n", - "[LOG] 🚀 Content extracted for https://github.com/microsoft/TypeScript/commits/main, success: True, time taken: 0.90 seconds\n", - "[LOG] 🔥 Extracting semantic blocks for https://github.com/microsoft/TypeScript/commits/main, Strategy: AsyncWebCrawler\n", - "[LOG] 🚀 Extraction done for https://github.com/microsoft/TypeScript/commits/main, time taken: 0.90 seconds.\n", - "Page 2: Found 35 commits\n", - "[LOG] 🕸️ Crawling https://github.com/microsoft/TypeScript/commits/main using AsyncPlaywrightCrawlerStrategy...\n", - "[LOG] ✅ Crawled https://github.com/microsoft/TypeScript/commits/main successfully!\n", - "[LOG] 🚀 Crawling done for https://github.com/microsoft/TypeScript/commits/main, success: True, time taken: 2.00 seconds\n", - "[LOG] 🚀 Content extracted for https://github.com/microsoft/TypeScript/commits/main, success: True, time taken: 0.74 seconds\n", - "[LOG] 🔥 Extracting semantic blocks for https://github.com/microsoft/TypeScript/commits/main, Strategy: AsyncWebCrawler\n", - "[LOG] 🚀 Extraction done for https://github.com/microsoft/TypeScript/commits/main, time taken: 0.75 seconds.\n", - "Page 3: Found 35 commits\n", - "Successfully crawled 105 commits across 3 pages\n" - ] - } - ], - "source": [ - "import re\n", - "from bs4 import BeautifulSoup\n", - "\n", - "async def crawl_typescript_commits():\n", - " first_commit = \"\"\n", - " async def on_execution_started(page):\n", - " nonlocal first_commit\n", - " try:\n", - " while True:\n", - " await page.wait_for_selector('li.Box-sc-g0xbh4-0 h4')\n", - " commit = await page.query_selector('li.Box-sc-g0xbh4-0 h4')\n", - " commit = await commit.evaluate('(element) => element.textContent')\n", - " commit = re.sub(r'\\s+', '', commit)\n", - " if commit and commit != first_commit:\n", - " first_commit = commit\n", - " break\n", - " await asyncio.sleep(0.5)\n", - " except Exception as e:\n", - " print(f\"Warning: New content didn't appear after JavaScript execution: {e}\")\n", - "\n", - " async with AsyncWebCrawler(verbose=True) as crawler:\n", - " crawler.crawler_strategy.set_hook('on_execution_started', on_execution_started)\n", - "\n", - " url = \"https://github.com/microsoft/TypeScript/commits/main\"\n", - " session_id = \"typescript_commits_session\"\n", - " all_commits = []\n", - "\n", - " js_next_page = \"\"\"\n", - " const button = document.querySelector('a[data-testid=\"pagination-next-button\"]');\n", - " if (button) button.click();\n", - " \"\"\"\n", - "\n", - " for page in range(3): # Crawl 3 pages\n", - " result = await crawler.arun(\n", - " url=url,\n", - " session_id=session_id,\n", - " css_selector=\"li.Box-sc-g0xbh4-0\",\n", - " js=js_next_page if page > 0 else None,\n", - " bypass_cache=True,\n", - " js_only=page > 0\n", - " )\n", - "\n", - " assert result.success, f\"Failed to crawl page {page + 1}\"\n", - "\n", - " soup = BeautifulSoup(result.cleaned_html, 'html.parser')\n", - " commits = soup.select(\"li\")\n", - " all_commits.extend(commits)\n", - "\n", - " print(f\"Page {page + 1}: Found {len(commits)} commits\")\n", - "\n", - " await crawler.crawler_strategy.kill_session(session_id)\n", - " print(f\"Successfully crawled {len(all_commits)} commits across 3 pages\")\n", - "\n", - "await crawl_typescript_commits()" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "EJRnYsp6yZQN" - }, - "source": [ - "### Using JsonCssExtractionStrategy for Fast Structured Output" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1ZMqIzB_8SYp" - }, - "source": [ - "The JsonCssExtractionStrategy is a powerful feature of Crawl4AI that allows for precise, structured data extraction from web pages. Here's how it works:\n", - "\n", - "1. You define a schema that describes the pattern of data you're interested in extracting.\n", - "2. The schema includes a base selector that identifies repeating elements on the page.\n", - "3. Within the schema, you define fields, each with its own selector and type.\n", - "4. These field selectors are applied within the context of each base selector element.\n", - "5. The strategy supports nested structures, lists within lists, and various data types.\n", - "6. You can even include computed fields for more complex data manipulation.\n", - "\n", - "This approach allows for highly flexible and precise data extraction, transforming semi-structured web content into clean, structured JSON data. It's particularly useful for extracting consistent data patterns from pages like product listings, news articles, or search results.\n", - "\n", - "For more details and advanced usage, check out the full documentation on the Crawl4AI website." - ] - }, + "cells": [ + { + "cell_type": "markdown", + "id": "0cba38e5", + "metadata": {}, + "source": [ + "# Crawl4AI 🕷️🤖\n", + "\"unclecode%2Fcrawl4ai\n", + "\n", + "[![GitHub Stars](https://img.shields.io/github/stars/unclecode/crawl4ai?style=social)](https://github.com/unclecode/crawl4ai/stargazers)\n", + "![PyPI - Downloads](https://img.shields.io/pypi/dm/Crawl4AI)\n", + "[![GitHub Forks](https://img.shields.io/github/forks/unclecode/crawl4ai?style=social)](https://github.com/unclecode/crawl4ai/network/members)\n", + "[![GitHub Issues](https://img.shields.io/github/issues/unclecode/crawl4ai)](https://github.com/unclecode/crawl4ai/issues)\n", + "[![GitHub Pull Requests](https://img.shields.io/github/issues-pr/unclecode/crawl4ai)](https://github.com/unclecode/crawl4ai/pulls)\n", + "[![License](https://img.shields.io/github/license/unclecode/crawl4ai)](https://github.com/unclecode/crawl4ai/blob/main/LICENSE)\n", + "\n", + "Crawl4AI simplifies asynchronous web crawling and data extraction, making it accessible for large language models (LLMs) and AI applications. 🆓🌐\n", + "\n", + "- GitHub Repository: [https://github.com/unclecode/crawl4ai](https://github.com/unclecode/crawl4ai)\n", + "- Twitter: [@unclecode](https://twitter.com/unclecode)\n", + "- Website: [https://crawl4ai.com](https://crawl4ai.com)\n", + "\n", + "## 🌟 Meet the Crawl4AI Assistant: Your Copilot for Crawling\n", + "Use the [Crawl4AI GPT Assistant](https://tinyurl.com/crawl4ai-gpt) as your AI-powered copilot! With this assistant, you can:\n", + "- 🧑‍💻 Generate code for complex crawling and extraction tasks\n", + "- 💡 Get tailored support and examples\n", + "- 📘 Learn Crawl4AI faster with step-by-step guidance" + ] + }, + { + "cell_type": "markdown", + "id": "41de6458", + "metadata": {}, + "source": [ + "### **Quickstart with Crawl4AI**" + ] + }, + { + "cell_type": "markdown", + "id": "1380e951", + "metadata": {}, + "source": [ + "#### 1. **Installation**\n", + "Install Crawl4AI and necessary dependencies:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05fecfad", + "metadata": {}, + "outputs": [], + "source": [ + "# %%capture\n", + "!pip install crawl4ai\n", + "!pip install nest_asyncio\n", + "!playwright install " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2c2a74c8", + "metadata": {}, + "outputs": [], + "source": [ + "import asyncio\n", + "import nest_asyncio\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "markdown", + "id": "f3c558d7", + "metadata": {}, + "source": [ + "#### 2. **Basic Setup and Simple Crawl**" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "003376f3", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": 12, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "trCMR2T9yZQN", - "outputId": "718d36f4-cccf-40f4-8d8c-c3ba73524d16" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[LOG] 🌤️ Warming up the AsyncWebCrawler\n", - "[LOG] 🌞 AsyncWebCrawler is ready to crawl\n", - "[LOG] 🕸️ Crawling https://www.nbcnews.com/business using AsyncPlaywrightCrawlerStrategy...\n", - "[LOG] ✅ Crawled https://www.nbcnews.com/business successfully!\n", - "[LOG] 🚀 Crawling done for https://www.nbcnews.com/business, success: True, time taken: 7.00 seconds\n", - "[LOG] 🚀 Content extracted for https://www.nbcnews.com/business, success: True, time taken: 0.32 seconds\n", - "[LOG] 🔥 Extracting semantic blocks for https://www.nbcnews.com/business, Strategy: AsyncWebCrawler\n", - "[LOG] 🚀 Extraction done for https://www.nbcnews.com/business, time taken: 0.48 seconds.\n", - "Successfully extracted 11 news teasers\n", - "{\n", - " \"category\": \"Business News\",\n", - " \"headline\": \"NBC ripped up its Olympics playbook for 2024 \\u2014 so far, the new strategy paid off\",\n", - " \"summary\": \"The Olympics have long been key to NBCUniversal. Paris marked the 18th Olympic Games broadcast by NBC in the U.S.\",\n", - " \"time\": \"13h ago\",\n", - " \"image\": {\n", - " \"src\": \"https://media-cldnry.s-nbcnews.com/image/upload/t_focal-200x100,f_auto,q_auto:best/rockcms/2024-09/240903-nbc-olympics-ch-1344-c7a486.jpg\",\n", - " \"alt\": \"Mike Tirico.\"\n", - " },\n", - " \"link\": \"https://www.nbcnews.com/business\"\n", - "}\n" - ] - } - ], - "source": [ - "async def extract_news_teasers():\n", - " schema = {\n", - " \"name\": \"News Teaser Extractor\",\n", - " \"baseSelector\": \".wide-tease-item__wrapper\",\n", - " \"fields\": [\n", - " {\n", - " \"name\": \"category\",\n", - " \"selector\": \".unibrow span[data-testid='unibrow-text']\",\n", - " \"type\": \"text\",\n", - " },\n", - " {\n", - " \"name\": \"headline\",\n", - " \"selector\": \".wide-tease-item__headline\",\n", - " \"type\": \"text\",\n", - " },\n", - " {\n", - " \"name\": \"summary\",\n", - " \"selector\": \".wide-tease-item__description\",\n", - " \"type\": \"text\",\n", - " },\n", - " {\n", - " \"name\": \"time\",\n", - " \"selector\": \"[data-testid='wide-tease-date']\",\n", - " \"type\": \"text\",\n", - " },\n", - " {\n", - " \"name\": \"image\",\n", - " \"type\": \"nested\",\n", - " \"selector\": \"picture.teasePicture img\",\n", - " \"fields\": [\n", - " {\"name\": \"src\", \"type\": \"attribute\", \"attribute\": \"src\"},\n", - " {\"name\": \"alt\", \"type\": \"attribute\", \"attribute\": \"alt\"},\n", - " ],\n", - " },\n", - " {\n", - " \"name\": \"link\",\n", - " \"selector\": \"a[href]\",\n", - " \"type\": \"attribute\",\n", - " \"attribute\": \"href\",\n", - " },\n", - " ],\n", - " }\n", - "\n", - " extraction_strategy = JsonCssExtractionStrategy(schema, verbose=True)\n", - "\n", - " async with AsyncWebCrawler(verbose=True) as crawler:\n", - " result = await crawler.arun(\n", - " url=\"https://www.nbcnews.com/business\",\n", - " extraction_strategy=extraction_strategy,\n", - " bypass_cache=True,\n", - " )\n", - "\n", - " assert result.success, \"Failed to crawl the page\"\n", - "\n", - " news_teasers = json.loads(result.extracted_content)\n", - " print(f\"Successfully extracted {len(news_teasers)} news teasers\")\n", - " print(json.dumps(news_teasers[0], indent=2))\n", - "\n", - "await extract_news_teasers()" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "[LOG] 🚀 Crawling done for https://www.nbcnews.com/business, success: True, time taken: 1.49 seconds\n", + "[LOG] 🚀 Content extracted for https://www.nbcnews.com/business, success: True, time taken: 0.10 seconds\n", + "[LOG] 🔥 Extracting semantic blocks for https://www.nbcnews.com/business, Strategy: AsyncWebCrawler\n", + "[LOG] 🚀 Extraction done for https://www.nbcnews.com/business, time taken: 0.10 seconds.\n", + "IE 11 is not supported. For an optimal experience visit our site on another browser.\n", + "\n", + "[Morning Rundown: Trump and Harris' vastly different closing pitches, why Kim Jong Un is helping Russia, and an ancient city is discovered by accident](https://www.nbcnews.com/news/harris-speech-ellipse-ancient-mayan-city-morning-rundown-rcna177973)[](https://www.nbcnews.com/news/harris-speech-ellipse-ancient-mayan-city-morning-rundown-rcna177973)\n", + "\n", + "Skip to Content\n", + "\n", + "[NBC News Logo](https://www.nbcnews.com)\n", + "\n", + "Spon\n" + ] + } + ], + "source": [ + "import asyncio\n", + "from crawl4ai import AsyncWebCrawler\n", + "\n", + "async def simple_crawl():\n", + " async with AsyncWebCrawler() as crawler:\n", + " result = await crawler.arun(\n", + " url=\"https://www.nbcnews.com/business\",\n", + " bypass_cache=True # By default this is False, meaning the cache will be used\n", + " )\n", + " print(result.markdown[:500]) # Print the first 500 characters\n", + " \n", + "asyncio.run(simple_crawl())" + ] + }, + { + "cell_type": "markdown", + "id": "da9b4d50", + "metadata": {}, + "source": [ + "#### 3. **Dynamic Content Handling**" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "5bb8c1e4", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "FnyVhJaByZQN" - }, - "source": [ - "## Speed Comparison\n", - "\n", - "Let's compare the speed of Crawl4AI with Firecrawl, a paid service. Note that we can't run Firecrawl in this Colab environment, so we'll simulate its performance based on previously recorded data." - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "[LOG] 🌤️ Warming up the AsyncWebCrawler\n", + "[LOG] 🌞 AsyncWebCrawler is ready to crawl\n", + "[LOG] 🕸️ Crawling https://www.nbcnews.com/business using AsyncPlaywrightCrawlerStrategy...\n", + "[LOG] ✅ Crawled https://www.nbcnews.com/business successfully!\n", + "[LOG] 🚀 Crawling done for https://www.nbcnews.com/business, success: True, time taken: 4.52 seconds\n", + "[LOG] 🚀 Content extracted for https://www.nbcnews.com/business, success: True, time taken: 0.15 seconds\n", + "[LOG] 🔥 Extracting semantic blocks for https://www.nbcnews.com/business, Strategy: AsyncWebCrawler\n", + "[LOG] 🚀 Extraction done for https://www.nbcnews.com/business, time taken: 0.15 seconds.\n", + "IE 11 is not supported. For an optimal experience visit our site on another browser.\n", + "\n", + "[Morning Rundown: Trump and Harris' vastly different closing pitches, why Kim Jong Un is helping Russia, and an ancient city is discovered by accident](https://www.nbcnews.com/news/harris-speech-ellipse-ancient-mayan-city-morning-rundown-rcna177973)[](https://www.nbcnews.com/news/harris-speech-ellipse-ancient-mayan-city-morning-rundown-rcna177973)\n", + "\n", + "Skip to Content\n", + "\n", + "[NBC News Logo](https://www.nbcnews.com)\n", + "\n", + "Spon\n" + ] + } + ], + "source": [ + "async def crawl_dynamic_content():\n", + " # You can use wait_for to wait for a condition to be met before returning the result\n", + " # wait_for = \"\"\"() => {\n", + " # return Array.from(document.querySelectorAll('article.tease-card')).length > 10;\n", + " # }\"\"\"\n", + "\n", + " # wait_for can be also just a css selector\n", + " # wait_for = \"article.tease-card:nth-child(10)\"\n", + "\n", + " async with AsyncWebCrawler(verbose=True) as crawler:\n", + " js_code = [\n", + " \"const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); loadMoreButton && loadMoreButton.click();\"\n", + " ]\n", + " result = await crawler.arun(\n", + " url=\"https://www.nbcnews.com/business\",\n", + " js_code=js_code,\n", + " # wait_for=wait_for,\n", + " bypass_cache=True,\n", + " )\n", + " print(result.markdown[:500]) # Print first 500 characters\n", + "\n", + "asyncio.run(crawl_dynamic_content())" + ] + }, + { + "cell_type": "markdown", + "id": "86febd8d", + "metadata": {}, + "source": [ + "#### 4. **Content Cleaning and Fit Markdown**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e8ab01f", + "metadata": {}, + "outputs": [], + "source": [ + "async def clean_content():\n", + " async with AsyncWebCrawler() as crawler:\n", + " result = await crawler.arun(\n", + " url=\"https://janineintheworld.com/places-to-visit-in-central-mexico\",\n", + " excluded_tags=['nav', 'footer', 'aside'],\n", + " remove_overlay_elements=True,\n", + " word_count_threshold=10,\n", + " bypass_cache=True\n", + " )\n", + " full_markdown_length = len(result.markdown)\n", + " fit_markdown_length = len(result.fit_markdown)\n", + " print(f\"Full Markdown Length: {full_markdown_length}\")\n", + " print(f\"Fit Markdown Length: {fit_markdown_length}\")\n", + " print(result.fit_markdown[:1000])\n", + " \n", + "\n", + "asyncio.run(clean_content())" + ] + }, + { + "cell_type": "markdown", + "id": "55715146", + "metadata": {}, + "source": [ + "#### 5. **Link Analysis and Smart Filtering**" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "2ae47c69", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "agDD186f3wig" - }, - "source": [ - "💡 **Note on Speed Comparison:**\n", - "\n", - "The speed test conducted here is running on Google Colab, where the internet speed and performance can vary and may not reflect optimal conditions. When we call Firecrawl's API, we're seeing its best performance, while Crawl4AI's performance is limited by Colab's network speed.\n", - "\n", - "For a more accurate comparison, it's recommended to run these tests on your own servers or computers with a stable and fast internet connection. Despite these limitations, Crawl4AI still demonstrates faster performance in this environment.\n", - "\n", - "If you run these tests locally, you may observe an even more significant speed advantage for Crawl4AI compared to other services." - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "[LOG] 🚀 Crawling done for https://www.nbcnews.com/business, success: True, time taken: 0.93 seconds\n", + "[LOG] 🚀 Content extracted for https://www.nbcnews.com/business, success: True, time taken: 0.11 seconds\n", + "[LOG] 🔥 Extracting semantic blocks for https://www.nbcnews.com/business, Strategy: AsyncWebCrawler\n", + "[LOG] 🚀 Extraction done for https://www.nbcnews.com/business, time taken: 0.11 seconds.\n", + "Found 107 internal links\n", + "Found 58 external links\n", + "Href: https://www.nbcnews.com/news/harris-speech-ellipse-ancient-mayan-city-morning-rundown-rcna177973\n", + "Text: Morning Rundown: Trump and Harris' vastly different closing pitches, why Kim Jong Un is helping Russia, and an ancient city is discovered by accident\n", + "\n", + "Href: https://www.nbcnews.com\n", + "Text: NBC News Logo\n", + "\n", + "Href: https://www.nbcnews.com/politics/2024-election/live-blog/kamala-harris-donald-trump-rally-election-live-updates-rcna177529\n", + "Text: 2024 Election\n", + "\n", + "Href: https://www.nbcnews.com/politics\n", + "Text: Politics\n", + "\n", + "Href: https://www.nbcnews.com/us-news\n", + "Text: U.S. News\n", + "\n" + ] + } + ], + "source": [ + "\n", + "async def link_analysis():\n", + " async with AsyncWebCrawler() as crawler:\n", + " result = await crawler.arun(\n", + " url=\"https://www.nbcnews.com/business\",\n", + " bypass_cache=True,\n", + " exclude_external_links=True,\n", + " exclude_social_media_links=True,\n", + " # exclude_domains=[\"facebook.com\", \"twitter.com\"]\n", + " )\n", + " print(f\"Found {len(result.links['internal'])} internal links\")\n", + " print(f\"Found {len(result.links['external'])} external links\")\n", + "\n", + " for link in result.links['internal'][:5]:\n", + " print(f\"Href: {link['href']}\\nText: {link['text']}\\n\")\n", + " \n", + "\n", + "asyncio.run(link_analysis())" + ] + }, + { + "cell_type": "markdown", + "id": "80cceef3", + "metadata": {}, + "source": [ + "#### 6. **Media Handling**" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "1fed7f99", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "F7KwHv8G1LbY" - }, - "outputs": [], - "source": [ - "!pip install firecrawl" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "[LOG] 🚀 Crawling done for https://www.nbcnews.com/business, success: True, time taken: 1.42 seconds\n", + "[LOG] 🚀 Content extracted for https://www.nbcnews.com/business, success: True, time taken: 0.11 seconds\n", + "[LOG] 🔥 Extracting semantic blocks for https://www.nbcnews.com/business, Strategy: AsyncWebCrawler\n", + "[LOG] 🚀 Extraction done for https://www.nbcnews.com/business, time taken: 0.12 seconds.\n", + "Image URL: https://media-cldnry.s-nbcnews.com/image/upload/t_focal-762x508,f_auto,q_auto:best/rockcms/2024-10/241023-NM-Chilccare-jg-27b982.jpg, Alt: , Score: 4\n", + "Image URL: https://media-cldnry.s-nbcnews.com/image/upload/t_focal-80x80,f_auto,q_auto:best/rockcms/2024-10/241030-china-ev-electric-mb-0746-cae05c.jpg, Alt: Volkswagen Workshop in Hefei, Score: 5\n", + "Image URL: https://media-cldnry.s-nbcnews.com/image/upload/t_focal-80x80,f_auto,q_auto:best/rockcms/2024-10/241029-nyc-subway-sandwich-2021-ac-922p-a92374.jpg, Alt: A sub is prepared at a Subway restaurant in Manhattan, New York City, Score: 5\n", + "Image URL: https://media-cldnry.s-nbcnews.com/image/upload/t_focal-80x80,f_auto,q_auto:best/rockcms/2024-10/241029-suv-gravity-ch-1618-752415.jpg, Alt: The Lucid Gravity car., Score: 5\n", + "Image URL: https://media-cldnry.s-nbcnews.com/image/upload/t_focal-80x80,f_auto,q_auto:best/rockcms/2024-10/241029-dearborn-michigan-f-150-ford-ranger-trucks-assembly-line-ac-426p-614f0b.jpg, Alt: Ford Introduces new F-150 And Ranger Trucks At Their Dearborn Plant, Score: 5\n" + ] + } + ], + "source": [ + "async def media_handling():\n", + " async with AsyncWebCrawler() as crawler:\n", + " result = await crawler.arun(\n", + " url=\"https://www.nbcnews.com/business\", \n", + " bypass_cache=True,\n", + " exclude_external_images=False,\n", + " screenshot=True\n", + " )\n", + " for img in result.media['images'][:5]:\n", + " print(f\"Image URL: {img['src']}, Alt: {img['alt']}, Score: {img['score']}\")\n", + " \n", + "asyncio.run(media_handling())" + ] + }, + { + "cell_type": "markdown", + "id": "9290499a", + "metadata": {}, + "source": [ + "#### 7. **Using Hooks for Custom Workflow**" + ] + }, + { + "cell_type": "markdown", + "id": "9d069c2b", + "metadata": {}, + "source": [ + "Hooks in Crawl4AI allow you to run custom logic at specific stages of the crawling process. This can be invaluable for scenarios like setting custom headers, logging activities, or processing content before it is returned. Below is an example of a basic workflow using a hook, followed by a complete list of available hooks and explanations on their usage." + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "bc4d2fc8", + "metadata": {}, + "outputs": [ { - "cell_type": "code", - "execution_count": 4, - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "91813zILyZQN", - "outputId": "663223db-ab89-4976-b233-05ceca62b19b" - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Firecrawl (simulated):\n", - "Time taken: 4.38 seconds\n", - "Content length: 41967 characters\n", - "Images found: 49\n", - "\n", - "Crawl4AI (simple crawl):\n", - "Time taken: 4.22 seconds\n", - "Content length: 18221 characters\n", - "Images found: 49\n", - "\n", - "Crawl4AI (with JavaScript execution):\n", - "Time taken: 9.13 seconds\n", - "Content length: 34243 characters\n", - "Images found: 89\n" - ] - } - ], - "source": [ - "import os\n", - "from google.colab import userdata\n", - "os.environ['FIRECRAWL_API_KEY'] = userdata.get('FIRECRAWL_API_KEY')\n", - "import time\n", - "from firecrawl import FirecrawlApp\n", - "\n", - "async def speed_comparison():\n", - " # Simulated Firecrawl performance\n", - " app = FirecrawlApp(api_key=os.environ['FIRECRAWL_API_KEY'])\n", - " start = time.time()\n", - " scrape_status = app.scrape_url(\n", - " 'https://www.nbcnews.com/business',\n", - " params={'formats': ['markdown', 'html']}\n", - " )\n", - " end = time.time()\n", - " print(\"Firecrawl (simulated):\")\n", - " print(f\"Time taken: {end - start:.2f} seconds\")\n", - " print(f\"Content length: {len(scrape_status['markdown'])} characters\")\n", - " print(f\"Images found: {scrape_status['markdown'].count('cldnry.s-nbcnews.com')}\")\n", - " print()\n", - "\n", - " async with AsyncWebCrawler() as crawler:\n", - " # Crawl4AI simple crawl\n", - " start = time.time()\n", - " result = await crawler.arun(\n", - " url=\"https://www.nbcnews.com/business\",\n", - " word_count_threshold=0,\n", - " bypass_cache=True,\n", - " verbose=False\n", - " )\n", - " end = time.time()\n", - " print(\"Crawl4AI (simple crawl):\")\n", - " print(f\"Time taken: {end - start:.2f} seconds\")\n", - " print(f\"Content length: {len(result.markdown)} characters\")\n", - " print(f\"Images found: {result.markdown.count('cldnry.s-nbcnews.com')}\")\n", - " print()\n", - "\n", - " # Crawl4AI with JavaScript execution\n", - " start = time.time()\n", - " result = await crawler.arun(\n", - " url=\"https://www.nbcnews.com/business\",\n", - " js_code=[\"const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); loadMoreButton && loadMoreButton.click();\"],\n", - " word_count_threshold=0,\n", - " bypass_cache=True,\n", - " verbose=False\n", - " )\n", - " end = time.time()\n", - " print(\"Crawl4AI (with JavaScript execution):\")\n", - " print(f\"Time taken: {end - start:.2f} seconds\")\n", - " print(f\"Content length: {len(result.markdown)} characters\")\n", - " print(f\"Images found: {result.markdown.count('cldnry.s-nbcnews.com')}\")\n", - "\n", - "await speed_comparison()" - ] - }, + "name": "stdout", + "output_type": "stream", + "text": [ + "[Hook] Preparing to navigate...\n", + "[LOG] 🚀 Crawling done for https://crawl4ai.com, success: True, time taken: 3.49 seconds\n", + "[LOG] 🚀 Content extracted for https://crawl4ai.com, success: True, time taken: 0.03 seconds\n", + "[LOG] 🔥 Extracting semantic blocks for https://crawl4ai.com, Strategy: AsyncWebCrawler\n", + "[LOG] 🚀 Extraction done for https://crawl4ai.com, time taken: 0.03 seconds.\n", + "[Crawl4AI Documentation](https://docs.crawl4ai.com/)\n", + "\n", + " * [ Home ](.)\n", + " * [ Installation ](basic/installation/)\n", + " * [ Quick Start ](basic/quickstart/)\n", + " * [ Search ](#)\n", + "\n", + "\n", + "\n", + " * Home\n", + " * [Installation](basic/installation/)\n", + " * [Quick Start](basic/quickstart/)\n", + " * Basic\n", + " * [Simple Crawling](basic/simple-crawling/)\n", + " * [Output Formats](basic/output-formats/)\n", + " * [Browser Configuration](basic/browser-config/)\n", + " * [Page Interaction](basic/page-interaction/)\n", + " * [Content Selection](basic/con\n" + ] + } + ], + "source": [ + "async def custom_hook_workflow():\n", + " async with AsyncWebCrawler() as crawler:\n", + " # Set a 'before_goto' hook to run custom code just before navigation\n", + " crawler.crawler_strategy.set_hook(\"before_goto\", lambda page: print(\"[Hook] Preparing to navigate...\"))\n", + " \n", + " # Perform the crawl operation\n", + " result = await crawler.arun(\n", + " url=\"https://crawl4ai.com\",\n", + " bypass_cache=True\n", + " )\n", + " print(result.markdown[:500]) # Display the first 500 characters\n", + "\n", + "asyncio.run(custom_hook_workflow())" + ] + }, + { + "cell_type": "markdown", + "id": "3ff45e21", + "metadata": {}, + "source": [ + "List of available hooks and examples for each stage of the crawling process:\n", + "\n", + "- **on_browser_created**\n", + " ```python\n", + " async def on_browser_created_hook(browser):\n", + " print(\"[Hook] Browser created\")\n", + " ```\n", + "\n", + "- **before_goto**\n", + " ```python\n", + " async def before_goto_hook(page):\n", + " await page.set_extra_http_headers({\"X-Test-Header\": \"test\"})\n", + " ```\n", + "\n", + "- **after_goto**\n", + " ```python\n", + " async def after_goto_hook(page):\n", + " print(f\"[Hook] Navigated to {page.url}\")\n", + " ```\n", + "\n", + "- **on_execution_started**\n", + " ```python\n", + " async def on_execution_started_hook(page):\n", + " print(\"[Hook] JavaScript execution started\")\n", + " ```\n", + "\n", + "- **before_return_html**\n", + " ```python\n", + " async def before_return_html_hook(page, html):\n", + " print(f\"[Hook] HTML length: {len(html)}\")\n", + " ```" + ] + }, + { + "cell_type": "markdown", + "id": "2d56ebb1", + "metadata": {}, + "source": [ + "#### 8. **Session-Based Crawling**\n", + "\n", + "When to Use Session-Based Crawling: \n", + "Session-based crawling is especially beneficial when navigating through multi-page content where each page load needs to maintain the same session context. For instance, in cases where a “Next Page” button must be clicked to load subsequent data, the new data often replaces the previous content. Here, session-based crawling keeps the browser state intact across each interaction, allowing for sequential actions within the same session.\n", + "\n", + "Example: Multi-Page Navigation Using JavaScript\n", + "In this example, we’ll navigate through multiple pages by clicking a \"Next Page\" button. After each page load, we extract the new content and repeat the process." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7bfebae", + "metadata": {}, + "outputs": [], + "source": [ + "async def multi_page_session_crawl():\n", + " async with AsyncWebCrawler() as crawler:\n", + " session_id = \"page_navigation_session\"\n", + " url = \"https://example.com/paged-content\"\n", + "\n", + " for page_number in range(1, 4):\n", + " result = await crawler.arun(\n", + " url=url,\n", + " session_id=session_id,\n", + " js_code=\"document.querySelector('.next-page-button').click();\" if page_number > 1 else None,\n", + " css_selector=\".content-section\",\n", + " bypass_cache=True\n", + " )\n", + " print(f\"Page {page_number} Content:\")\n", + " print(result.markdown[:500]) # Print first 500 characters\n", + "\n", + "# asyncio.run(multi_page_session_crawl())" + ] + }, + { + "cell_type": "markdown", + "id": "ad32a778", + "metadata": {}, + "source": [ + "#### 9. **Using Extraction Strategies**\n", + "\n", + "**LLM Extraction**\n", + "\n", + "This example demonstrates how to use language model-based extraction to retrieve structured data from a pricing page on OpenAI’s site." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "3011a7c5", + "metadata": {}, + "outputs": [ { - "cell_type": "markdown", - "metadata": { - "id": "OBFFYVJIyZQN" - }, - "source": [ - "If you run on a local machine with a proper internet speed:\n", - "- Simple crawl: Crawl4AI is typically over 3-4 times faster than Firecrawl.\n", - "- With JavaScript execution: Even when executing JavaScript to load more content (potentially doubling the number of images found), Crawl4AI is still faster than Firecrawl's simple crawl.\n", - "\n", - "Please note that actual performance may vary depending on network conditions and the specific content being crawled." - ] + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "--- Extracting Structured Data with openai/gpt-4o-mini ---\n", + "[LOG] 🌤️ Warming up the AsyncWebCrawler\n", + "[LOG] 🌞 AsyncWebCrawler is ready to crawl\n", + "[LOG] 🕸️ Crawling https://openai.com/api/pricing/ using AsyncPlaywrightCrawlerStrategy...\n", + "[LOG] ✅ Crawled https://openai.com/api/pricing/ successfully!\n", + "[LOG] 🚀 Crawling done for https://openai.com/api/pricing/, success: True, time taken: 1.29 seconds\n", + "[LOG] 🚀 Content extracted for https://openai.com/api/pricing/, success: True, time taken: 0.13 seconds\n", + "[LOG] 🔥 Extracting semantic blocks for https://openai.com/api/pricing/, Strategy: AsyncWebCrawler\n", + "[LOG] Call LLM for https://openai.com/api/pricing/ - block index: 0\n", + "[LOG] Extracted 26 blocks from URL: https://openai.com/api/pricing/ block index: 0\n", + "[LOG] 🚀 Extraction done for https://openai.com/api/pricing/, time taken: 15.12 seconds.\n", + "[{'model_name': 'gpt-4o', 'input_fee': '$2.50 / 1M input tokens', 'output_fee': '$10.00 / 1M output tokens', 'error': False}, {'model_name': 'gpt-4o-2024-08-06', 'input_fee': '$2.50 / 1M input tokens', 'output_fee': '$10.00 / 1M output tokens', 'error': False}, {'model_name': 'gpt-4o-audio-preview', 'input_fee': '$2.50 / 1M input tokens', 'output_fee': '$10.00 / 1M output tokens', 'error': False}, {'model_name': 'gpt-4o-audio-preview-2024-10-01', 'input_fee': '$2.50 / 1M input tokens', 'output_fee': '$10.00 / 1M output tokens', 'error': False}, {'model_name': 'gpt-4o-2024-05-13', 'input_fee': '$5.00 / 1M input tokens', 'output_fee': '$15.00 / 1M output tokens', 'error': False}]\n" + ] }, { - "cell_type": "markdown", - "metadata": { - "id": "A6_1RK1_yZQO" - }, - "source": [ - "## Conclusion\n", - "\n", - "In this notebook, we've explored the powerful features of Crawl4AI, including:\n", - "\n", - "1. Basic crawling\n", - "2. JavaScript execution and CSS selector usage\n", - "3. Proxy support\n", - "4. Structured data extraction with OpenAI\n", - "5. Advanced multi-page crawling with JavaScript execution\n", - "6. Fast structured output using JsonCssExtractionStrategy\n", - "7. Speed comparison with other services\n", - "\n", - "Crawl4AI offers a fast, flexible, and powerful solution for web crawling and data extraction tasks. Its asynchronous architecture and advanced features make it suitable for a wide range of applications, from simple web scraping to complex, multi-page data extraction scenarios.\n", - "\n", - "For more information and advanced usage, please visit the [Crawl4AI documentation](https://crawl4ai.com/mkdocs/).\n", - "\n", - "Happy crawling!" - ] + "name": "stderr", + "output_type": "stream", + "text": [ + "/Users/unclecode/devs/crawl4ai/venv/lib/python3.10/site-packages/pydantic/main.py:347: UserWarning: Pydantic serializer warnings:\n", + " Expected `PromptTokensDetails` but got `dict` - serialized value may not be as expected\n", + " return self.__pydantic_serializer__.to_python(\n" + ] } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.13" + ], + "source": [ + "from crawl4ai.extraction_strategy import LLMExtractionStrategy\n", + "from pydantic import BaseModel, Field\n", + "import os, json\n", + "\n", + "class OpenAIModelFee(BaseModel):\n", + " model_name: str = Field(..., description=\"Name of the OpenAI model.\")\n", + " input_fee: str = Field(..., description=\"Fee for input token for the OpenAI model.\")\n", + " output_fee: str = Field(\n", + " ..., description=\"Fee for output token for the OpenAI model.\"\n", + " )\n", + "\n", + "async def extract_structured_data_using_llm(provider: str, api_token: str = None, extra_headers: dict = None):\n", + " print(f\"\\n--- Extracting Structured Data with {provider} ---\")\n", + " \n", + " # Skip if API token is missing (for providers that require it)\n", + " if api_token is None and provider != \"ollama\":\n", + " print(f\"API token is required for {provider}. Skipping this example.\")\n", + " return\n", + "\n", + " extra_args = {\"extra_headers\": extra_headers} if extra_headers else {}\n", + "\n", + " async with AsyncWebCrawler(verbose=True) as crawler:\n", + " result = await crawler.arun(\n", + " url=\"https://openai.com/api/pricing/\",\n", + " word_count_threshold=1,\n", + " extraction_strategy=LLMExtractionStrategy(\n", + " provider=provider,\n", + " api_token=api_token,\n", + " schema=OpenAIModelFee.schema(),\n", + " extraction_type=\"schema\",\n", + " instruction=\"\"\"Extract all model names along with fees for input and output tokens.\"\n", + " \"{model_name: 'GPT-4', input_fee: 'US$10.00 / 1M tokens', output_fee: 'US$30.00 / 1M tokens'}.\"\"\",\n", + " **extra_args\n", + " ),\n", + " bypass_cache=True,\n", + " )\n", + " print(json.loads(result.extracted_content)[:5])\n", + "\n", + "# Usage:\n", + "await extract_structured_data_using_llm(\"openai/gpt-4o-mini\", os.getenv(\"OPENAI_API_KEY\"))" + ] + }, + { + "cell_type": "markdown", + "id": "6532db9d", + "metadata": {}, + "source": [ + "**Cosine Similarity Strategy**\n", + "\n", + "This strategy uses semantic clustering to extract relevant content based on contextual similarity, which is helpful when extracting related sections from a single topic." + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "ec079108", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[LOG] Loading Extraction Model for mps device.\n", + "[LOG] Loading Multilabel Classifier for mps device.\n", + "[LOG] Model loaded sentence-transformers/all-MiniLM-L6-v2, models/reuters, took 5.193778038024902 seconds\n", + "[LOG] 🚀 Crawling done for https://www.nbcnews.com/business/consumer/how-mcdonalds-e-coli-crisis-inflation-politics-reflect-american-story-rcna177156, success: True, time taken: 1.37 seconds\n", + "[LOG] 🚀 Content extracted for https://www.nbcnews.com/business/consumer/how-mcdonalds-e-coli-crisis-inflation-politics-reflect-american-story-rcna177156, success: True, time taken: 0.07 seconds\n", + "[LOG] 🔥 Extracting semantic blocks for https://www.nbcnews.com/business/consumer/how-mcdonalds-e-coli-crisis-inflation-politics-reflect-american-story-rcna177156, Strategy: AsyncWebCrawler\n", + "[LOG] 🚀 Assign tags using mps\n", + "[LOG] 🚀 Categorization done in 0.55 seconds\n", + "[LOG] 🚀 Extraction done for https://www.nbcnews.com/business/consumer/how-mcdonalds-e-coli-crisis-inflation-politics-reflect-american-story-rcna177156, time taken: 6.63 seconds.\n", + "[{'index': 1, 'tags': ['news_&_social_concern'], 'content': \"McDonald's 2024 combo: Inflation, a health crisis and a side of politics # McDonald's 2024 combo: Inflation, a health crisis and a side of politics\"}, {'index': 2, 'tags': ['business_&_entrepreneurs', 'news_&_social_concern'], 'content': 'Like many major brands, McDonald’s raked in big profits as the economy reopened from the pandemic. In October 2022, [executives were boasting](https://www.cnbc.com/2022/10/27/mcdonalds-mcd-earnings-q3-2022.html) that they’d been raising prices without crimping traffic, even as competitors began to warn that some customers were closing their wallets after inflation peaked above 9% that summer. Still, the U.S. had repeatedly dodged a much-forecast recession, and [Americans kept spending on nonessentials](https://www.nbcnews.com/business/economy/year-peak-inflation-travel-leisure-mostly-cost-less-rcna92760) like travel and dining out — despite regularly relaying to pollsters their dismal views of an otherwise solid economy. Even so, 64% of consumers said they noticed price increases at quick-service restaurants in September, more than at any other type of venue, according to a survey by Datassential, a food and beverage market researcher. Politicians are still drawing attention to fast-food costs, too, as the election season barrels toward a tumultuous finish. A group of Democratic senators this month [denounced McDonald’s for menu prices](https://www.nbcnews.com/news/us-news/democratic-senators-slam-mcdonalds-menu-price-hikes-rcna176380) that they said outstripped inflation, accusing the company of looking to profit “at the expense of people’s ability to put food on the table.” The financial results come toward the end of a humbling year for the nearly $213 billion restaurant chain, whose shares remained steady on the heels of its latest earnings. Kempczinski [sought to reassure investors](https://www.cnbc.com/2024/10/29/mcdonalds-e-coli-outbreak-ceo-comments.html) that [the E. coli outbreak](https://www.nbcnews.com/health/health-news/illnesses-linked-mcdonalds-e-coli-outbreak-rise-75-cdc-says-rcna177260), linked to Quarter Pounder burgers, was under control after the health crisis temporarily dented the company’s stock and caused U.S. foot traffic to drop nearly 10% in the days afterward, according to estimates by Gordon Haskett financial researchers. The fast-food giant [reported Tuesday](https://www.cnbc.com/2024/10/29/mcdonalds-mcd-earnings-q3-2024.html) that it had reversed its recent U.S. sales drop, posting a 0.3% uptick in the third quarter. Foot traffic was still down slightly, but the company said its summer of discounts was paying off. But by early this year, [photos of eye-watering menu prices](https://x.com/sam_learner/status/1681367351143301129) at some McDonald’s locations — including an $18 Big Mac combo at a Connecticut rest stop from July 2023 — went viral, bringing diners’ long-simmering frustrations to a boiling point that the company couldn’t ignore. On an earnings call in April, Kempczinski acknowledged that foot traffic had fallen. “We will stay laser-focused on providing an unparalleled experience with simple, everyday value and affordability that our consumers can count on as they continue to be mindful about their spending,” CEO Chris Kempczinski [said in a statement](https://www.prnewswire.com/news-releases/mcdonalds-reports-third-quarter-2024-results-302289216.html?Fds-Load-Behavior=force-external) alongside the earnings report.'}, {'index': 3, 'tags': ['food_&_dining', 'news_&_social_concern'], 'content': '![mcdonalds drive-thru economy fast food](https://media-cldnry.s-nbcnews.com/image/upload/t_fit-760w,f_auto,q_auto:best/rockcms/2024-10/241024-los-angeles-mcdonalds-drive-thru-ac-1059p-cfc311.jpg)McDonald’s has had some success leaning into discounts this year. Eric Thayer / Bloomberg via Getty Images file'}, {'index': 4, 'tags': ['business_&_entrepreneurs', 'food_&_dining', 'news_&_social_concern'], 'content': 'McDonald’s has faced a customer revolt over pricey Big Macs, an unsolicited cameo in election-season crossfire, and now an E. coli outbreak — just as the company had been luring customers back with more affordable burgers. Despite a difficult quarter, McDonald’s looks resilient in the face of various pressures, analysts say — something the company shares with U.S. consumers overall. “Consumers continue to be even more discriminating with every dollar that they spend,” he said at the time. Going forward, McDonald’s would be “laser-focused” on affordability. “McDonald’s has also done a good job of embedding the brand in popular culture to enhance its relevance and meaning around fun and family. But it also needed to modify the product line to meet the expectations of a consumer who is on a tight budget,” he said. “The thing that McDonald’s had struggled with, and why I think we’re seeing kind of an inflection point, is a value proposition,” Senatore said. “McDonald’s menu price increases had run ahead of a lot of its restaurant peers. … Consumers are savvy enough to know that.” For many consumers, the fast-food giant’s menus serve as an informal gauge of the economy overall, said Sara Senatore, a Bank of America analyst covering restaurants. “The spotlight is always on McDonald’s because it’s so big” and something of a “bellwether,” she said. McDonald’s didn’t respond to requests for comment.'}, {'index': 5, 'tags': ['business_&_entrepreneurs', 'food_&_dining'], 'content': 'Mickey D’s’ $5 meal deal, which it launched in late June to jumpstart slumping sales, has given the company an appealing price point to advertise nationwide, Senatore said, speculating that it could open the door to a new permanent value offering. But before that promotion rolled out, the company’s reputation as a low-cost option had taken a bruising hit.'}]\n" + ] } + ], + "source": [ + "from crawl4ai.extraction_strategy import CosineStrategy\n", + "\n", + "async def cosine_similarity_extraction():\n", + " async with AsyncWebCrawler() as crawler:\n", + " strategy = CosineStrategy(\n", + " word_count_threshold=10,\n", + " max_dist=0.2, # Maximum distance between two words\n", + " linkage_method=\"ward\", # Linkage method for hierarchical clustering (ward, complete, average, single)\n", + " top_k=3, # Number of top keywords to extract\n", + " sim_threshold=0.3, # Similarity threshold for clustering\n", + " semantic_filter=\"McDonald's economic impact, American consumer trends\", # Keywords to filter the content semantically using embeddings\n", + " verbose=True\n", + " )\n", + " \n", + " result = await crawler.arun(\n", + " url=\"https://www.nbcnews.com/business/consumer/how-mcdonalds-e-coli-crisis-inflation-politics-reflect-american-story-rcna177156\",\n", + " extraction_strategy=strategy\n", + " )\n", + " print(json.loads(result.extracted_content)[:5])\n", + "\n", + "asyncio.run(cosine_similarity_extraction())\n" + ] + }, + { + "cell_type": "markdown", + "id": "ff423629", + "metadata": {}, + "source": [ + "#### 10. **Conclusion and Next Steps**\n", + "\n", + "You’ve explored core features of Crawl4AI, including dynamic content handling, link analysis, and advanced extraction strategies. Visit our documentation for further details on using Crawl4AI’s extensive features.\n", + "\n", + "- GitHub Repository: [https://github.com/unclecode/crawl4ai](https://github.com/unclecode/crawl4ai)\n", + "- Twitter: [@unclecode](https://twitter.com/unclecode)\n", + "- Website: [https://crawl4ai.com](https://crawl4ai.com)\n", + "\n", + "Happy Crawling with Crawl4AI! 🕷️🤖\n" + ] + }, + { + "cell_type": "markdown", + "id": "d34c1d35", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" }, - "nbformat": 4, - "nbformat_minor": 0 + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 } diff --git a/docs/examples/quickstart_async.py b/docs/examples/quickstart_async.py index f6c16a4..d67a8c3 100644 --- a/docs/examples/quickstart_async.py +++ b/docs/examples/quickstart_async.py @@ -10,7 +10,7 @@ import json import os import re -from typing import Dict +from typing import Dict, List from bs4 import BeautifulSoup from pydantic import BaseModel, Field from crawl4ai import AsyncWebCrawler @@ -71,12 +71,12 @@ async def use_proxy(): "Note: Replace 'http://your-proxy-url:port' with a working proxy to run this example." ) # Uncomment and modify the following lines to use a proxy - # async with AsyncWebCrawler(verbose=True, proxy="http://your-proxy-url:port") as crawler: - # result = await crawler.arun( - # url="https://www.nbcnews.com/business", - # bypass_cache=True - # ) - # print(result.markdown[:500]) # Print first 500 characters + async with AsyncWebCrawler(verbose=True, proxy="http://your-proxy-url:port") as crawler: + result = await crawler.arun( + url="https://www.nbcnews.com/business", + bypass_cache=True + ) + print(result.markdown[:500]) # Print first 500 characters async def capture_and_save_screenshot(url: str, output_path: str): async with AsyncWebCrawler(verbose=True) as crawler: @@ -379,6 +379,19 @@ async def crawl_custom_browser_type(): print(result.markdown[:500]) print("Time taken: ", time.time() - start) +async def crawl_with_user_simultion(): + async with AsyncWebCrawler(verbose=True, headless=True) as crawler: + url = "YOUR-URL-HERE" + result = await crawler.arun( + url=url, + bypass_cache=True, + magic = True, # Automatically detects and removes overlays, popups, and other elements that block content + # simulate_user = True,# Causes a series of random mouse movements and clicks to simulate user interaction + # override_navigator = True # Overrides the navigator object to make it look like a real user + ) + + print(result.markdown) + async def speed_comparison(): # print("\n--- Speed Comparison ---") # print("Firecrawl (simulated):") @@ -444,6 +457,57 @@ async def speed_comparison(): print("If you run these tests in an environment with better network conditions,") print("you may observe an even more significant speed advantage for Crawl4AI.") +async def generate_knowledge_graph(): + class Entity(BaseModel): + name: str + description: str + + class Relationship(BaseModel): + entity1: Entity + entity2: Entity + description: str + relation_type: str + + class KnowledgeGraph(BaseModel): + entities: List[Entity] + relationships: List[Relationship] + + extraction_strategy = LLMExtractionStrategy( + provider='openai/gpt-4o-mini', # Or any other provider, including Ollama and open source models + api_token=os.getenv('OPENAI_API_KEY'), # In case of Ollama just pass "no-token" + schema=KnowledgeGraph.model_json_schema(), + extraction_type="schema", + instruction="""Extract entities and relationships from the given text.""" + ) + async with AsyncWebCrawler() as crawler: + url = "https://paulgraham.com/love.html" + result = await crawler.arun( + url=url, + bypass_cache=True, + extraction_strategy=extraction_strategy, + # magic=True + ) + # print(result.extracted_content) + with open(os.path.join(__location__, "kb.json"), "w") as f: + f.write(result.extracted_content) + +async def fit_markdown_remove_overlay(): + async with AsyncWebCrawler(headless = False) as crawler: + url = "https://janineintheworld.com/places-to-visit-in-central-mexico" + result = await crawler.arun( + url=url, + bypass_cache=True, + word_count_threshold = 10, + remove_overlay_elements=True, + screenshot = True + ) + # Save markdown to file + with open(os.path.join(__location__, "mexico_places.md"), "w") as f: + f.write(result.fit_markdown) + + print("Done") + + async def main(): await simple_crawl() await simple_example_with_running_js_code() @@ -455,7 +519,7 @@ async def main(): # LLM extraction examples await extract_structured_data_using_llm() await extract_structured_data_using_llm("huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct", os.getenv("HUGGINGFACE_API_KEY")) - await extract_structured_data_using_llm("openai/gpt-4", os.getenv("OPENAI_API_KEY")) + await extract_structured_data_using_llm("openai/gpt-4o", os.getenv("OPENAI_API_KEY")) await extract_structured_data_using_llm("ollama/llama3.2") # You always can pass custom headers to the extraction strategy diff --git a/docs/examples/quickstart_v0.ipynb b/docs/examples/quickstart_v0.ipynb new file mode 100644 index 0000000..71f23ac --- /dev/null +++ b/docs/examples/quickstart_v0.ipynb @@ -0,0 +1,735 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "6yLvrXn7yZQI" + }, + "source": [ + "# Crawl4AI: Advanced Web Crawling and Data Extraction\n", + "\n", + "Welcome to this interactive notebook showcasing Crawl4AI, an advanced asynchronous web crawling and data extraction library.\n", + "\n", + "- GitHub Repository: [https://github.com/unclecode/crawl4ai](https://github.com/unclecode/crawl4ai)\n", + "- Twitter: [@unclecode](https://twitter.com/unclecode)\n", + "- Website: [https://crawl4ai.com](https://crawl4ai.com)\n", + "\n", + "Let's explore the powerful features of Crawl4AI!" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KIn_9nxFyZQK" + }, + "source": [ + "## Installation\n", + "\n", + "First, let's install Crawl4AI from GitHub:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "mSnaxLf3zMog" + }, + "outputs": [], + "source": [ + "!sudo apt-get update && sudo apt-get install -y libwoff1 libopus0 libwebp6 libwebpdemux2 libenchant1c2a libgudev-1.0-0 libsecret-1-0 libhyphen0 libgdk-pixbuf2.0-0 libegl1 libnotify4 libxslt1.1 libevent-2.1-7 libgles2 libvpx6 libxcomposite1 libatk1.0-0 libatk-bridge2.0-0 libepoxy0 libgtk-3-0 libharfbuzz-icu0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xlXqaRtayZQK" + }, + "outputs": [], + "source": [ + "!pip install crawl4ai\n", + "!pip install nest-asyncio\n", + "!playwright install" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qKCE7TI7yZQL" + }, + "source": [ + "Now, let's import the necessary libraries:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "id": "I67tr7aAyZQL" + }, + "outputs": [], + "source": [ + "import asyncio\n", + "import nest_asyncio\n", + "from crawl4ai import AsyncWebCrawler\n", + "from crawl4ai.extraction_strategy import JsonCssExtractionStrategy, LLMExtractionStrategy\n", + "import json\n", + "import time\n", + "from pydantic import BaseModel, Field\n", + "\n", + "nest_asyncio.apply()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "h7yR_Rt_yZQM" + }, + "source": [ + "## Basic Usage\n", + "\n", + "Let's start with a simple crawl example:" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yBh6hf4WyZQM", + "outputId": "0f83af5c-abba-4175-ed95-70b7512e6bcc" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[LOG] 🌤️ Warming up the AsyncWebCrawler\n", + "[LOG] 🌞 AsyncWebCrawler is ready to crawl\n", + "[LOG] 🚀 Content extracted for https://www.nbcnews.com/business, success: True, time taken: 0.05 seconds\n", + "[LOG] 🚀 Extraction done for https://www.nbcnews.com/business, time taken: 0.05 seconds.\n", + "18102\n" + ] + } + ], + "source": [ + "async def simple_crawl():\n", + " async with AsyncWebCrawler(verbose=True) as crawler:\n", + " result = await crawler.arun(url=\"https://www.nbcnews.com/business\")\n", + " print(len(result.markdown))\n", + "await simple_crawl()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9rtkgHI28uI4" + }, + "source": [ + "💡 By default, **Crawl4AI** caches the result of every URL, so the next time you call it, you’ll get an instant result. But if you want to bypass the cache, just set `bypass_cache=True`." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "MzZ0zlJ9yZQM" + }, + "source": [ + "## Advanced Features\n", + "\n", + "### Executing JavaScript and Using CSS Selectors" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gHStF86xyZQM", + "outputId": "34d0fb6d-4dec-4677-f76e-85a1f082829b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[LOG] 🌤️ Warming up the AsyncWebCrawler\n", + "[LOG] 🌞 AsyncWebCrawler is ready to crawl\n", + "[LOG] 🕸️ Crawling https://www.nbcnews.com/business using AsyncPlaywrightCrawlerStrategy...\n", + "[LOG] ✅ Crawled https://www.nbcnews.com/business successfully!\n", + "[LOG] 🚀 Crawling done for https://www.nbcnews.com/business, success: True, time taken: 6.06 seconds\n", + "[LOG] 🚀 Content extracted for https://www.nbcnews.com/business, success: True, time taken: 0.10 seconds\n", + "[LOG] 🔥 Extracting semantic blocks for https://www.nbcnews.com/business, Strategy: AsyncWebCrawler\n", + "[LOG] 🚀 Extraction done for https://www.nbcnews.com/business, time taken: 0.11 seconds.\n", + "41135\n" + ] + } + ], + "source": [ + "async def js_and_css():\n", + " async with AsyncWebCrawler(verbose=True) as crawler:\n", + " js_code = [\"const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); loadMoreButton && loadMoreButton.click();\"]\n", + " result = await crawler.arun(\n", + " url=\"https://www.nbcnews.com/business\",\n", + " js_code=js_code,\n", + " # css_selector=\"YOUR_CSS_SELECTOR_HERE\",\n", + " bypass_cache=True\n", + " )\n", + " print(len(result.markdown))\n", + "\n", + "await js_and_css()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "cqE_W4coyZQM" + }, + "source": [ + "### Using a Proxy\n", + "\n", + "Note: You'll need to replace the proxy URL with a working proxy for this example to run successfully." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "QjAyiAGqyZQM" + }, + "outputs": [], + "source": [ + "async def use_proxy():\n", + " async with AsyncWebCrawler(verbose=True, proxy=\"http://your-proxy-url:port\") as crawler:\n", + " result = await crawler.arun(\n", + " url=\"https://www.nbcnews.com/business\",\n", + " bypass_cache=True\n", + " )\n", + " print(result.markdown[:500]) # Print first 500 characters\n", + "\n", + "# Uncomment the following line to run the proxy example\n", + "# await use_proxy()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "XTZ88lbayZQN" + }, + "source": [ + "### Extracting Structured Data with OpenAI\n", + "\n", + "Note: You'll need to set your OpenAI API key as an environment variable for this example to work." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "fIOlDayYyZQN", + "outputId": "cb8359cc-dee0-4762-9698-5dfdcee055b8" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[LOG] 🌤️ Warming up the AsyncWebCrawler\n", + "[LOG] 🌞 AsyncWebCrawler is ready to crawl\n", + "[LOG] 🕸️ Crawling https://openai.com/api/pricing/ using AsyncPlaywrightCrawlerStrategy...\n", + "[LOG] ✅ Crawled https://openai.com/api/pricing/ successfully!\n", + "[LOG] 🚀 Crawling done for https://openai.com/api/pricing/, success: True, time taken: 3.77 seconds\n", + "[LOG] 🚀 Content extracted for https://openai.com/api/pricing/, success: True, time taken: 0.21 seconds\n", + "[LOG] 🔥 Extracting semantic blocks for https://openai.com/api/pricing/, Strategy: AsyncWebCrawler\n", + "[LOG] Call LLM for https://openai.com/api/pricing/ - block index: 0\n", + "[LOG] Call LLM for https://openai.com/api/pricing/ - block index: 1\n", + "[LOG] Call LLM for https://openai.com/api/pricing/ - block index: 2\n", + "[LOG] Call LLM for https://openai.com/api/pricing/ - block index: 3\n", + "[LOG] Extracted 4 blocks from URL: https://openai.com/api/pricing/ block index: 3\n", + "[LOG] Call LLM for https://openai.com/api/pricing/ - block index: 4\n", + "[LOG] Extracted 5 blocks from URL: https://openai.com/api/pricing/ block index: 0\n", + "[LOG] Extracted 1 blocks from URL: https://openai.com/api/pricing/ block index: 4\n", + "[LOG] Extracted 8 blocks from URL: https://openai.com/api/pricing/ block index: 1\n", + "[LOG] Extracted 12 blocks from URL: https://openai.com/api/pricing/ block index: 2\n", + "[LOG] 🚀 Extraction done for https://openai.com/api/pricing/, time taken: 8.55 seconds.\n", + "5029\n" + ] + } + ], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')\n", + "\n", + "class OpenAIModelFee(BaseModel):\n", + " model_name: str = Field(..., description=\"Name of the OpenAI model.\")\n", + " input_fee: str = Field(..., description=\"Fee for input token for the OpenAI model.\")\n", + " output_fee: str = Field(..., description=\"Fee for output token for the OpenAI model.\")\n", + "\n", + "async def extract_openai_fees():\n", + " async with AsyncWebCrawler(verbose=True) as crawler:\n", + " result = await crawler.arun(\n", + " url='https://openai.com/api/pricing/',\n", + " word_count_threshold=1,\n", + " extraction_strategy=LLMExtractionStrategy(\n", + " provider=\"openai/gpt-4o\", api_token=os.getenv('OPENAI_API_KEY'),\n", + " schema=OpenAIModelFee.schema(),\n", + " extraction_type=\"schema\",\n", + " instruction=\"\"\"From the crawled content, extract all mentioned model names along with their fees for input and output tokens.\n", + " Do not miss any models in the entire content. One extracted model JSON format should look like this:\n", + " {\"model_name\": \"GPT-4\", \"input_fee\": \"US$10.00 / 1M tokens\", \"output_fee\": \"US$30.00 / 1M tokens\"}.\"\"\"\n", + " ),\n", + " bypass_cache=True,\n", + " )\n", + " print(len(result.extracted_content))\n", + "\n", + "# Uncomment the following line to run the OpenAI extraction example\n", + "await extract_openai_fees()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "BypA5YxEyZQN" + }, + "source": [ + "### Advanced Multi-Page Crawling with JavaScript Execution" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tfkcVQ0b7mw-" + }, + "source": [ + "## Advanced Multi-Page Crawling with JavaScript Execution\n", + "\n", + "This example demonstrates Crawl4AI's ability to handle complex crawling scenarios, specifically extracting commits from multiple pages of a GitHub repository. The challenge here is that clicking the \"Next\" button doesn't load a new page, but instead uses asynchronous JavaScript to update the content. This is a common hurdle in modern web crawling.\n", + "\n", + "To overcome this, we use Crawl4AI's custom JavaScript execution to simulate clicking the \"Next\" button, and implement a custom hook to detect when new data has loaded. Our strategy involves comparing the first commit's text before and after \"clicking\" Next, waiting until it changes to confirm new data has rendered. This showcases Crawl4AI's flexibility in handling dynamic content and its ability to implement custom logic for even the most challenging crawling tasks." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "qUBKGpn3yZQN", + "outputId": "3e555b6a-ed33-42f4-cce9-499a923fbe17" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[LOG] 🌤️ Warming up the AsyncWebCrawler\n", + "[LOG] 🌞 AsyncWebCrawler is ready to crawl\n", + "[LOG] 🕸️ Crawling https://github.com/microsoft/TypeScript/commits/main using AsyncPlaywrightCrawlerStrategy...\n", + "[LOG] ✅ Crawled https://github.com/microsoft/TypeScript/commits/main successfully!\n", + "[LOG] 🚀 Crawling done for https://github.com/microsoft/TypeScript/commits/main, success: True, time taken: 5.16 seconds\n", + "[LOG] 🚀 Content extracted for https://github.com/microsoft/TypeScript/commits/main, success: True, time taken: 0.28 seconds\n", + "[LOG] 🔥 Extracting semantic blocks for https://github.com/microsoft/TypeScript/commits/main, Strategy: AsyncWebCrawler\n", + "[LOG] 🚀 Extraction done for https://github.com/microsoft/TypeScript/commits/main, time taken: 0.28 seconds.\n", + "Page 1: Found 35 commits\n", + "[LOG] 🕸️ Crawling https://github.com/microsoft/TypeScript/commits/main using AsyncPlaywrightCrawlerStrategy...\n", + "[LOG] ✅ Crawled https://github.com/microsoft/TypeScript/commits/main successfully!\n", + "[LOG] 🚀 Crawling done for https://github.com/microsoft/TypeScript/commits/main, success: True, time taken: 0.78 seconds\n", + "[LOG] 🚀 Content extracted for https://github.com/microsoft/TypeScript/commits/main, success: True, time taken: 0.90 seconds\n", + "[LOG] 🔥 Extracting semantic blocks for https://github.com/microsoft/TypeScript/commits/main, Strategy: AsyncWebCrawler\n", + "[LOG] 🚀 Extraction done for https://github.com/microsoft/TypeScript/commits/main, time taken: 0.90 seconds.\n", + "Page 2: Found 35 commits\n", + "[LOG] 🕸️ Crawling https://github.com/microsoft/TypeScript/commits/main using AsyncPlaywrightCrawlerStrategy...\n", + "[LOG] ✅ Crawled https://github.com/microsoft/TypeScript/commits/main successfully!\n", + "[LOG] 🚀 Crawling done for https://github.com/microsoft/TypeScript/commits/main, success: True, time taken: 2.00 seconds\n", + "[LOG] 🚀 Content extracted for https://github.com/microsoft/TypeScript/commits/main, success: True, time taken: 0.74 seconds\n", + "[LOG] 🔥 Extracting semantic blocks for https://github.com/microsoft/TypeScript/commits/main, Strategy: AsyncWebCrawler\n", + "[LOG] 🚀 Extraction done for https://github.com/microsoft/TypeScript/commits/main, time taken: 0.75 seconds.\n", + "Page 3: Found 35 commits\n", + "Successfully crawled 105 commits across 3 pages\n" + ] + } + ], + "source": [ + "import re\n", + "from bs4 import BeautifulSoup\n", + "\n", + "async def crawl_typescript_commits():\n", + " first_commit = \"\"\n", + " async def on_execution_started(page):\n", + " nonlocal first_commit\n", + " try:\n", + " while True:\n", + " await page.wait_for_selector('li.Box-sc-g0xbh4-0 h4')\n", + " commit = await page.query_selector('li.Box-sc-g0xbh4-0 h4')\n", + " commit = await commit.evaluate('(element) => element.textContent')\n", + " commit = re.sub(r'\\s+', '', commit)\n", + " if commit and commit != first_commit:\n", + " first_commit = commit\n", + " break\n", + " await asyncio.sleep(0.5)\n", + " except Exception as e:\n", + " print(f\"Warning: New content didn't appear after JavaScript execution: {e}\")\n", + "\n", + " async with AsyncWebCrawler(verbose=True) as crawler:\n", + " crawler.crawler_strategy.set_hook('on_execution_started', on_execution_started)\n", + "\n", + " url = \"https://github.com/microsoft/TypeScript/commits/main\"\n", + " session_id = \"typescript_commits_session\"\n", + " all_commits = []\n", + "\n", + " js_next_page = \"\"\"\n", + " const button = document.querySelector('a[data-testid=\"pagination-next-button\"]');\n", + " if (button) button.click();\n", + " \"\"\"\n", + "\n", + " for page in range(3): # Crawl 3 pages\n", + " result = await crawler.arun(\n", + " url=url,\n", + " session_id=session_id,\n", + " css_selector=\"li.Box-sc-g0xbh4-0\",\n", + " js=js_next_page if page > 0 else None,\n", + " bypass_cache=True,\n", + " js_only=page > 0\n", + " )\n", + "\n", + " assert result.success, f\"Failed to crawl page {page + 1}\"\n", + "\n", + " soup = BeautifulSoup(result.cleaned_html, 'html.parser')\n", + " commits = soup.select(\"li\")\n", + " all_commits.extend(commits)\n", + "\n", + " print(f\"Page {page + 1}: Found {len(commits)} commits\")\n", + "\n", + " await crawler.crawler_strategy.kill_session(session_id)\n", + " print(f\"Successfully crawled {len(all_commits)} commits across 3 pages\")\n", + "\n", + "await crawl_typescript_commits()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EJRnYsp6yZQN" + }, + "source": [ + "### Using JsonCssExtractionStrategy for Fast Structured Output" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1ZMqIzB_8SYp" + }, + "source": [ + "The JsonCssExtractionStrategy is a powerful feature of Crawl4AI that allows for precise, structured data extraction from web pages. Here's how it works:\n", + "\n", + "1. You define a schema that describes the pattern of data you're interested in extracting.\n", + "2. The schema includes a base selector that identifies repeating elements on the page.\n", + "3. Within the schema, you define fields, each with its own selector and type.\n", + "4. These field selectors are applied within the context of each base selector element.\n", + "5. The strategy supports nested structures, lists within lists, and various data types.\n", + "6. You can even include computed fields for more complex data manipulation.\n", + "\n", + "This approach allows for highly flexible and precise data extraction, transforming semi-structured web content into clean, structured JSON data. It's particularly useful for extracting consistent data patterns from pages like product listings, news articles, or search results.\n", + "\n", + "For more details and advanced usage, check out the full documentation on the Crawl4AI website." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "trCMR2T9yZQN", + "outputId": "718d36f4-cccf-40f4-8d8c-c3ba73524d16" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[LOG] 🌤️ Warming up the AsyncWebCrawler\n", + "[LOG] 🌞 AsyncWebCrawler is ready to crawl\n", + "[LOG] 🕸️ Crawling https://www.nbcnews.com/business using AsyncPlaywrightCrawlerStrategy...\n", + "[LOG] ✅ Crawled https://www.nbcnews.com/business successfully!\n", + "[LOG] 🚀 Crawling done for https://www.nbcnews.com/business, success: True, time taken: 7.00 seconds\n", + "[LOG] 🚀 Content extracted for https://www.nbcnews.com/business, success: True, time taken: 0.32 seconds\n", + "[LOG] 🔥 Extracting semantic blocks for https://www.nbcnews.com/business, Strategy: AsyncWebCrawler\n", + "[LOG] 🚀 Extraction done for https://www.nbcnews.com/business, time taken: 0.48 seconds.\n", + "Successfully extracted 11 news teasers\n", + "{\n", + " \"category\": \"Business News\",\n", + " \"headline\": \"NBC ripped up its Olympics playbook for 2024 \\u2014 so far, the new strategy paid off\",\n", + " \"summary\": \"The Olympics have long been key to NBCUniversal. Paris marked the 18th Olympic Games broadcast by NBC in the U.S.\",\n", + " \"time\": \"13h ago\",\n", + " \"image\": {\n", + " \"src\": \"https://media-cldnry.s-nbcnews.com/image/upload/t_focal-200x100,f_auto,q_auto:best/rockcms/2024-09/240903-nbc-olympics-ch-1344-c7a486.jpg\",\n", + " \"alt\": \"Mike Tirico.\"\n", + " },\n", + " \"link\": \"https://www.nbcnews.com/business\"\n", + "}\n" + ] + } + ], + "source": [ + "async def extract_news_teasers():\n", + " schema = {\n", + " \"name\": \"News Teaser Extractor\",\n", + " \"baseSelector\": \".wide-tease-item__wrapper\",\n", + " \"fields\": [\n", + " {\n", + " \"name\": \"category\",\n", + " \"selector\": \".unibrow span[data-testid='unibrow-text']\",\n", + " \"type\": \"text\",\n", + " },\n", + " {\n", + " \"name\": \"headline\",\n", + " \"selector\": \".wide-tease-item__headline\",\n", + " \"type\": \"text\",\n", + " },\n", + " {\n", + " \"name\": \"summary\",\n", + " \"selector\": \".wide-tease-item__description\",\n", + " \"type\": \"text\",\n", + " },\n", + " {\n", + " \"name\": \"time\",\n", + " \"selector\": \"[data-testid='wide-tease-date']\",\n", + " \"type\": \"text\",\n", + " },\n", + " {\n", + " \"name\": \"image\",\n", + " \"type\": \"nested\",\n", + " \"selector\": \"picture.teasePicture img\",\n", + " \"fields\": [\n", + " {\"name\": \"src\", \"type\": \"attribute\", \"attribute\": \"src\"},\n", + " {\"name\": \"alt\", \"type\": \"attribute\", \"attribute\": \"alt\"},\n", + " ],\n", + " },\n", + " {\n", + " \"name\": \"link\",\n", + " \"selector\": \"a[href]\",\n", + " \"type\": \"attribute\",\n", + " \"attribute\": \"href\",\n", + " },\n", + " ],\n", + " }\n", + "\n", + " extraction_strategy = JsonCssExtractionStrategy(schema, verbose=True)\n", + "\n", + " async with AsyncWebCrawler(verbose=True) as crawler:\n", + " result = await crawler.arun(\n", + " url=\"https://www.nbcnews.com/business\",\n", + " extraction_strategy=extraction_strategy,\n", + " bypass_cache=True,\n", + " )\n", + "\n", + " assert result.success, \"Failed to crawl the page\"\n", + "\n", + " news_teasers = json.loads(result.extracted_content)\n", + " print(f\"Successfully extracted {len(news_teasers)} news teasers\")\n", + " print(json.dumps(news_teasers[0], indent=2))\n", + "\n", + "await extract_news_teasers()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FnyVhJaByZQN" + }, + "source": [ + "## Speed Comparison\n", + "\n", + "Let's compare the speed of Crawl4AI with Firecrawl, a paid service. Note that we can't run Firecrawl in this Colab environment, so we'll simulate its performance based on previously recorded data." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "agDD186f3wig" + }, + "source": [ + "💡 **Note on Speed Comparison:**\n", + "\n", + "The speed test conducted here is running on Google Colab, where the internet speed and performance can vary and may not reflect optimal conditions. When we call Firecrawl's API, we're seeing its best performance, while Crawl4AI's performance is limited by Colab's network speed.\n", + "\n", + "For a more accurate comparison, it's recommended to run these tests on your own servers or computers with a stable and fast internet connection. Despite these limitations, Crawl4AI still demonstrates faster performance in this environment.\n", + "\n", + "If you run these tests locally, you may observe an even more significant speed advantage for Crawl4AI compared to other services." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "F7KwHv8G1LbY" + }, + "outputs": [], + "source": [ + "!pip install firecrawl" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "91813zILyZQN", + "outputId": "663223db-ab89-4976-b233-05ceca62b19b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Firecrawl (simulated):\n", + "Time taken: 4.38 seconds\n", + "Content length: 41967 characters\n", + "Images found: 49\n", + "\n", + "Crawl4AI (simple crawl):\n", + "Time taken: 4.22 seconds\n", + "Content length: 18221 characters\n", + "Images found: 49\n", + "\n", + "Crawl4AI (with JavaScript execution):\n", + "Time taken: 9.13 seconds\n", + "Content length: 34243 characters\n", + "Images found: 89\n" + ] + } + ], + "source": [ + "import os\n", + "from google.colab import userdata\n", + "os.environ['FIRECRAWL_API_KEY'] = userdata.get('FIRECRAWL_API_KEY')\n", + "import time\n", + "from firecrawl import FirecrawlApp\n", + "\n", + "async def speed_comparison():\n", + " # Simulated Firecrawl performance\n", + " app = FirecrawlApp(api_key=os.environ['FIRECRAWL_API_KEY'])\n", + " start = time.time()\n", + " scrape_status = app.scrape_url(\n", + " 'https://www.nbcnews.com/business',\n", + " params={'formats': ['markdown', 'html']}\n", + " )\n", + " end = time.time()\n", + " print(\"Firecrawl (simulated):\")\n", + " print(f\"Time taken: {end - start:.2f} seconds\")\n", + " print(f\"Content length: {len(scrape_status['markdown'])} characters\")\n", + " print(f\"Images found: {scrape_status['markdown'].count('cldnry.s-nbcnews.com')}\")\n", + " print()\n", + "\n", + " async with AsyncWebCrawler() as crawler:\n", + " # Crawl4AI simple crawl\n", + " start = time.time()\n", + " result = await crawler.arun(\n", + " url=\"https://www.nbcnews.com/business\",\n", + " word_count_threshold=0,\n", + " bypass_cache=True,\n", + " verbose=False\n", + " )\n", + " end = time.time()\n", + " print(\"Crawl4AI (simple crawl):\")\n", + " print(f\"Time taken: {end - start:.2f} seconds\")\n", + " print(f\"Content length: {len(result.markdown)} characters\")\n", + " print(f\"Images found: {result.markdown.count('cldnry.s-nbcnews.com')}\")\n", + " print()\n", + "\n", + " # Crawl4AI with JavaScript execution\n", + " start = time.time()\n", + " result = await crawler.arun(\n", + " url=\"https://www.nbcnews.com/business\",\n", + " js_code=[\"const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); loadMoreButton && loadMoreButton.click();\"],\n", + " word_count_threshold=0,\n", + " bypass_cache=True,\n", + " verbose=False\n", + " )\n", + " end = time.time()\n", + " print(\"Crawl4AI (with JavaScript execution):\")\n", + " print(f\"Time taken: {end - start:.2f} seconds\")\n", + " print(f\"Content length: {len(result.markdown)} characters\")\n", + " print(f\"Images found: {result.markdown.count('cldnry.s-nbcnews.com')}\")\n", + "\n", + "await speed_comparison()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OBFFYVJIyZQN" + }, + "source": [ + "If you run on a local machine with a proper internet speed:\n", + "- Simple crawl: Crawl4AI is typically over 3-4 times faster than Firecrawl.\n", + "- With JavaScript execution: Even when executing JavaScript to load more content (potentially doubling the number of images found), Crawl4AI is still faster than Firecrawl's simple crawl.\n", + "\n", + "Please note that actual performance may vary depending on network conditions and the specific content being crawled." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "A6_1RK1_yZQO" + }, + "source": [ + "## Conclusion\n", + "\n", + "In this notebook, we've explored the powerful features of Crawl4AI, including:\n", + "\n", + "1. Basic crawling\n", + "2. JavaScript execution and CSS selector usage\n", + "3. Proxy support\n", + "4. Structured data extraction with OpenAI\n", + "5. Advanced multi-page crawling with JavaScript execution\n", + "6. Fast structured output using JsonCssExtractionStrategy\n", + "7. Speed comparison with other services\n", + "\n", + "Crawl4AI offers a fast, flexible, and powerful solution for web crawling and data extraction tasks. Its asynchronous architecture and advanced features make it suitable for a wide range of applications, from simple web scraping to complex, multi-page data extraction scenarios.\n", + "\n", + "For more information and advanced usage, please visit the [Crawl4AI documentation](https://crawl4ai.com/mkdocs/).\n", + "\n", + "Happy crawling!" + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/docs/examples/v0.3.74.overview.py b/docs/examples/v0.3.74.overview.py new file mode 100644 index 0000000..362ae8f --- /dev/null +++ b/docs/examples/v0.3.74.overview.py @@ -0,0 +1,277 @@ +import os, sys +# append the parent directory to the sys.path +parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(parent_dir) +parent_parent_dir = os.path.dirname(parent_dir) +sys.path.append(parent_parent_dir) +__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) +__data__ = os.path.join(__location__, "__data") +import asyncio +from pathlib import Path +import aiohttp +import json +from crawl4ai import AsyncWebCrawler, CacheMode +from crawl4ai.content_filter_strategy import BM25ContentFilter + +# 1. File Download Processing Example +async def download_example(): + """Example of downloading files from Python.org""" + # downloads_path = os.path.join(os.getcwd(), "downloads") + downloads_path = os.path.join(Path.home(), ".crawl4ai", "downloads") + os.makedirs(downloads_path, exist_ok=True) + + print(f"Downloads will be saved to: {downloads_path}") + + async with AsyncWebCrawler( + accept_downloads=True, + downloads_path=downloads_path, + verbose=True + ) as crawler: + result = await crawler.arun( + url="https://www.python.org/downloads/", + js_code=""" + // Find and click the first Windows installer link + const downloadLink = document.querySelector('a[href$=".exe"]'); + if (downloadLink) { + console.log('Found download link:', downloadLink.href); + downloadLink.click(); + } else { + console.log('No .exe download link found'); + } + """, + delay_before_return_html=1, # Wait 5 seconds to ensure download starts + cache_mode=CacheMode.BYPASS + ) + + if result.downloaded_files: + print("\nDownload successful!") + print("Downloaded files:") + for file_path in result.downloaded_files: + print(f"- {file_path}") + print(f" File size: {os.path.getsize(file_path) / (1024*1024):.2f} MB") + else: + print("\nNo files were downloaded") + +# 2. Local File and Raw HTML Processing Example +async def local_and_raw_html_example(): + """Example of processing local files and raw HTML""" + # Create a sample HTML file + sample_file = os.path.join(__data__, "sample.html") + with open(sample_file, "w") as f: + f.write(""" + +

    Test Content

    +

    This is a test paragraph.

    + + """) + + async with AsyncWebCrawler(verbose=True) as crawler: + # Process local file + local_result = await crawler.arun( + url=f"file://{os.path.abspath(sample_file)}" + ) + + # Process raw HTML + raw_html = """ + +

    Raw HTML Test

    +

    This is a test of raw HTML processing.

    + + """ + raw_result = await crawler.arun( + url=f"raw:{raw_html}" + ) + + # Clean up + os.remove(sample_file) + + print("Local file content:", local_result.markdown) + print("\nRaw HTML content:", raw_result.markdown) + +# 3. Enhanced Markdown Generation Example +async def markdown_generation_example(): + """Example of enhanced markdown generation with citations and LLM-friendly features""" + async with AsyncWebCrawler(verbose=True) as crawler: + # Create a content filter (optional) + content_filter = BM25ContentFilter( + # user_query="History and cultivation", + bm25_threshold=1.0 + ) + + result = await crawler.arun( + url="https://en.wikipedia.org/wiki/Apple", + css_selector="main div#bodyContent", + content_filter=content_filter, + cache_mode=CacheMode.BYPASS + ) + + from crawl4ai import AsyncWebCrawler + from crawl4ai.content_filter_strategy import BM25ContentFilter + + result = await crawler.arun( + url="https://en.wikipedia.org/wiki/Apple", + css_selector="main div#bodyContent", + content_filter=BM25ContentFilter() + ) + print(result.markdown_v2.fit_markdown) + + print("\nMarkdown Generation Results:") + print(f"1. Original markdown length: {len(result.markdown)}") + print(f"2. New markdown versions (markdown_v2):") + print(f" - Raw markdown length: {len(result.markdown_v2.raw_markdown)}") + print(f" - Citations markdown length: {len(result.markdown_v2.markdown_with_citations)}") + print(f" - References section length: {len(result.markdown_v2.references_markdown)}") + if result.markdown_v2.fit_markdown: + print(f" - Filtered markdown length: {len(result.markdown_v2.fit_markdown)}") + + # Save examples to files + output_dir = os.path.join(__data__, "markdown_examples") + os.makedirs(output_dir, exist_ok=True) + + # Save different versions + with open(os.path.join(output_dir, "1_raw_markdown.md"), "w") as f: + f.write(result.markdown_v2.raw_markdown) + + with open(os.path.join(output_dir, "2_citations_markdown.md"), "w") as f: + f.write(result.markdown_v2.markdown_with_citations) + + with open(os.path.join(output_dir, "3_references.md"), "w") as f: + f.write(result.markdown_v2.references_markdown) + + if result.markdown_v2.fit_markdown: + with open(os.path.join(output_dir, "4_filtered_markdown.md"), "w") as f: + f.write(result.markdown_v2.fit_markdown) + + print(f"\nMarkdown examples saved to: {output_dir}") + + # Show a sample of citations and references + print("\nSample of markdown with citations:") + print(result.markdown_v2.markdown_with_citations[:500] + "...\n") + print("Sample of references:") + print('\n'.join(result.markdown_v2.references_markdown.split('\n')[:10]) + "...") + +# 4. Browser Management Example +async def browser_management_example(): + """Example of using enhanced browser management features""" + # Use the specified user directory path + user_data_dir = os.path.join(Path.home(), ".crawl4ai", "browser_profile") + os.makedirs(user_data_dir, exist_ok=True) + + print(f"Browser profile will be saved to: {user_data_dir}") + + async with AsyncWebCrawler( + use_managed_browser=True, + user_data_dir=user_data_dir, + headless=False, + verbose=True + ) as crawler: + + result = await crawler.arun( + url="https://crawl4ai.com", + # session_id="persistent_session_1", + cache_mode=CacheMode.BYPASS + ) + # Use GitHub as an example - it's a good test for browser management + # because it requires proper browser handling + result = await crawler.arun( + url="https://github.com/trending", + # session_id="persistent_session_1", + cache_mode=CacheMode.BYPASS + ) + + print("\nBrowser session result:", result.success) + if result.success: + print("Page title:", result.metadata.get('title', 'No title found')) + +# 5. API Usage Example +async def api_example(): + """Example of using the new API endpoints""" + api_token = os.getenv('CRAWL4AI_API_TOKEN') or "test_api_code" + headers = {'Authorization': f'Bearer {api_token}'} + async with aiohttp.ClientSession() as session: + # Submit crawl job + crawl_request = { + "urls": ["https://news.ycombinator.com"], # Hacker News as an example + "extraction_config": { + "type": "json_css", + "params": { + "schema": { + "name": "Hacker News Articles", + "baseSelector": ".athing", + "fields": [ + { + "name": "title", + "selector": ".title a", + "type": "text" + }, + { + "name": "score", + "selector": ".score", + "type": "text" + }, + { + "name": "url", + "selector": ".title a", + "type": "attribute", + "attribute": "href" + } + ] + } + } + }, + "crawler_params": { + "headless": True, + # "use_managed_browser": True + }, + "cache_mode": "bypass", + # "screenshot": True, + # "magic": True + } + + async with session.post( + "http://localhost:11235/crawl", + json=crawl_request, + headers=headers + ) as response: + task_data = await response.json() + task_id = task_data["task_id"] + + # Check task status + while True: + async with session.get( + f"http://localhost:11235/task/{task_id}", + headers=headers + ) as status_response: + result = await status_response.json() + print(f"Task status: {result['status']}") + + if result["status"] == "completed": + print("Task completed!") + print("Results:") + news = json.loads(result["results"][0]['extracted_content']) + print(json.dumps(news[:4], indent=2)) + break + else: + await asyncio.sleep(1) + +# Main execution +async def main(): + # print("Running Crawl4AI feature examples...") + + # print("\n1. Running Download Example:") + # await download_example() + + # print("\n2. Running Markdown Generation Example:") + # await markdown_generation_example() + + # # print("\n3. Running Local and Raw HTML Example:") + # await local_and_raw_html_example() + + # # print("\n4. Running Browser Management Example:") + await browser_management_example() + + # print("\n5. Running API Example:") + await api_example() + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/docs/extraction_strategies.json b/docs/extraction_strategies.json deleted file mode 100644 index 570e1e3..0000000 --- a/docs/extraction_strategies.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "NoExtractionStrategy": "### NoExtractionStrategy\n\n`NoExtractionStrategy` is a basic extraction strategy that returns the entire HTML content without any modification. It is useful for cases where no specific extraction is required. Only clean html, and amrkdown.\n\n#### Constructor Parameters:\nNone.\n\n#### Example usage:\n```python\nextractor = NoExtractionStrategy()\nextracted_content = extractor.extract(url, html)\n```", - - "LLMExtractionStrategy": "### LLMExtractionStrategy\n\n`LLMExtractionStrategy` uses a Language Model (LLM) to extract meaningful blocks or chunks from the given HTML content. This strategy leverages an external provider for language model completions.\n\n#### Constructor Parameters:\n- `provider` (str, optional): The provider to use for the language model completions. Default is `DEFAULT_PROVIDER` (e.g., openai/gpt-4).\n- `api_token` (str, optional): The API token for the provider. If not provided, it will try to load from the environment variable `OPENAI_API_KEY`.\n- `instruction` (str, optional): An instruction to guide the LLM on how to perform the extraction. This allows users to specify the type of data they are interested in or set the tone of the response. Default is `None`.\n\n#### Example usage:\n```python\nextractor = LLMExtractionStrategy(provider='openai', api_token='your_api_token', instruction='Extract only news about AI.')\nextracted_content = extractor.extract(url, html)\n```\n\nBy providing clear instructions, users can tailor the extraction process to their specific needs, enhancing the relevance and utility of the extracted content.", - - "CosineStrategy": "### CosineStrategy\n\n`CosineStrategy` uses hierarchical clustering based on cosine similarity to extract clusters of text from the given HTML content. This strategy is suitable for identifying related content sections.\n\n#### Constructor Parameters:\n- `semantic_filter` (str, optional): A string containing keywords for filtering relevant documents before clustering. If provided, documents are filtered based on their cosine similarity to the keyword filter embedding. Default is `None`.\n- `word_count_threshold` (int, optional): Minimum number of words per cluster. Default is `20`.\n- `max_dist` (float, optional): The maximum cophenetic distance on the dendrogram to form clusters. Default is `0.2`.\n- `linkage_method` (str, optional): The linkage method for hierarchical clustering. Default is `'ward'`.\n- `top_k` (int, optional): Number of top categories to extract. Default is `3`.\n- `model_name` (str, optional): The model name for embedding generation. Default is `'BAAI/bge-small-en-v1.5'`.\n\n#### Example usage:\n```python\nextractor = CosineStrategy(semantic_filter='artificial intelligence', word_count_threshold=10, max_dist=0.2, linkage_method='ward', top_k=3, model_name='BAAI/bge-small-en-v1.5')\nextracted_content = extractor.extract(url, html)\n```\n\n#### Cosine Similarity Filtering\n\nWhen a `semantic_filter` is provided, the `CosineStrategy` applies an embedding-based filtering process to select relevant documents before performing hierarchical clustering.", - - "TopicExtractionStrategy": "### TopicExtractionStrategy\n\n`TopicExtractionStrategy` uses the TextTiling algorithm to segment the HTML content into topics and extracts keywords for each segment. This strategy is useful for identifying and summarizing thematic content.\n\n#### Constructor Parameters:\n- `num_keywords` (int, optional): Number of keywords to represent each topic segment. Default is `3`.\n\n#### Example usage:\n```python\nextractor = TopicExtractionStrategy(num_keywords=3)\nextracted_content = extractor.extract(url, html)\n```" - } - \ No newline at end of file diff --git a/docs/md _sync/api/core_classes_and_functions.md b/docs/md _sync/api/core_classes_and_functions.md deleted file mode 100644 index 198fd42..0000000 --- a/docs/md _sync/api/core_classes_and_functions.md +++ /dev/null @@ -1,141 +0,0 @@ -# Core Classes and Functions - -## Overview - -In this section, we will delve into the core classes and functions that make up the Crawl4AI library. This includes the `WebCrawler` class, various `CrawlerStrategy` classes, `ChunkingStrategy` classes, and `ExtractionStrategy` classes. Understanding these core components will help you leverage the full power of Crawl4AI for your web crawling and data extraction needs. - -## WebCrawler Class - -The `WebCrawler` class is the main class you'll interact with. It provides the interface for crawling web pages and extracting data. - -### Initialization - -```python -from crawl4ai import WebCrawler - -# Create an instance of WebCrawler -crawler = WebCrawler() -``` - -### Methods - -- **`warmup()`**: Prepares the crawler for use, such as loading necessary models. -- **`run(url: str, **kwargs)`**: Runs the crawler on the specified URL with optional parameters for customization. - -```python -crawler.warmup() -result = crawler.run(url="https://www.nbcnews.com/business") -print(result) -``` - -## CrawlerStrategy Classes - -The `CrawlerStrategy` classes define how the web crawling is executed. The base class is `CrawlerStrategy`, which is extended by specific implementations like `LocalSeleniumCrawlerStrategy`. - -### CrawlerStrategy Base Class - -An abstract base class that defines the interface for different crawler strategies. - -```python -from abc import ABC, abstractmethod - -class CrawlerStrategy(ABC): - @abstractmethod - def crawl(self, url: str, **kwargs) -> str: - pass - - @abstractmethod - def take_screenshot(self, save_path: str): - pass - - @abstractmethod - def update_user_agent(self, user_agent: str): - pass - - @abstractmethod - def set_hook(self, hook_type: str, hook: Callable): - pass -``` - -### LocalSeleniumCrawlerStrategy Class - -A concrete implementation of `CrawlerStrategy` that uses Selenium to crawl web pages. - -#### Initialization - -```python -from crawl4ai.crawler_strategy import LocalSeleniumCrawlerStrategy - -strategy = LocalSeleniumCrawlerStrategy(js_code=["console.log('Hello, world!');"]) -``` - -#### Methods - -- **`crawl(url: str, **kwargs)`**: Crawls the specified URL. -- **`take_screenshot(save_path: str)`**: Takes a screenshot of the current page. -- **`update_user_agent(user_agent: str)`**: Updates the user agent for the browser. -- **`set_hook(hook_type: str, hook: Callable)`**: Sets a hook for various events. - -```python -result = strategy.crawl("https://www.example.com") -strategy.take_screenshot("screenshot.png") -strategy.update_user_agent("Mozilla/5.0") -strategy.set_hook("before_get_url", lambda: print("About to get URL")) -``` - -## ChunkingStrategy Classes - -The `ChunkingStrategy` classes define how the text from a web page is divided into chunks. Here are a few examples: - -### RegexChunking Class - -Splits text using regular expressions. - -```python -from crawl4ai.chunking_strategy import RegexChunking - -chunker = RegexChunking(patterns=[r'\n\n']) -chunks = chunker.chunk("This is a sample text. It will be split into chunks.") -``` - -### NlpSentenceChunking Class - -Uses NLP to split text into sentences. - -```python -from crawl4ai.chunking_strategy import NlpSentenceChunking - -chunker = NlpSentenceChunking() -chunks = chunker.chunk("This is a sample text. It will be split into sentences.") -``` - -## ExtractionStrategy Classes - -The `ExtractionStrategy` classes define how meaningful content is extracted from the chunks. Here are a few examples: - -### CosineStrategy Class - -Clusters text chunks based on cosine similarity. - -```python -from crawl4ai.extraction_strategy import CosineStrategy - -extractor = CosineStrategy(semantic_filter="finance", word_count_threshold=10) -extracted_content = extractor.extract(url="https://www.example.com", html="...") -``` - -### LLMExtractionStrategy Class - -Uses a Language Model to extract meaningful blocks from HTML. - -```python -from crawl4ai.extraction_strategy import LLMExtractionStrategy - -extractor = LLMExtractionStrategy(provider='openai', api_token='your_api_token', instruction='Extract only news about AI.') -extracted_content = extractor.extract(url="https://www.example.com", html="...") -``` - -## Conclusion - -By understanding these core classes and functions, you can customize and extend Crawl4AI to suit your specific web crawling and data extraction needs. Happy crawling! 🕷️🤖 - diff --git a/docs/md _sync/api/detailed_api_documentation.md b/docs/md _sync/api/detailed_api_documentation.md deleted file mode 100644 index aaf0e34..0000000 --- a/docs/md _sync/api/detailed_api_documentation.md +++ /dev/null @@ -1,338 +0,0 @@ -# Detailed API Documentation - -## Overview - -This section provides comprehensive documentation for the Crawl4AI API, covering all classes, methods, and their parameters. This guide will help you understand how to utilize the API to its full potential, enabling efficient web crawling and data extraction. - -## WebCrawler Class - -The `WebCrawler` class is the primary interface for crawling web pages and extracting data. - -### Initialization - -```python -from crawl4ai import WebCrawler - -crawler = WebCrawler() -``` - -### Methods - -#### `warmup()` - -Prepares the crawler for use, such as loading necessary models. - -```python -crawler.warmup() -``` - -#### `run(url: str, **kwargs) -> CrawlResult` - -Crawls the specified URL and returns the result. - -- **Parameters:** - - `url` (str): The URL to crawl. - - `**kwargs`: Additional parameters for customization. - -- **Returns:** - - `CrawlResult`: An object containing the crawl result. - -- **Example:** - -```python -result = crawler.run(url="https://www.nbcnews.com/business") -print(result) -``` - -### CrawlResult Class - -Represents the result of a crawl operation. - -- **Attributes:** - - `url` (str): The URL of the crawled page. - - `html` (str): The raw HTML of the page. - - `success` (bool): Whether the crawl was successful. - - `cleaned_html` (Optional[str]): The cleaned HTML. - - `media` (Dict[str, List[Dict]]): Media tags in the page (images, audio, video). - - `links` (Dict[str, List[Dict]]): Links in the page (external, internal). - - `screenshot` (Optional[str]): Base64 encoded screenshot. - - `markdown` (Optional[str]): Extracted content in Markdown format. - - `extracted_content` (Optional[str]): Extracted meaningful content. - - `metadata` (Optional[dict]): Metadata from the page. - - `error_message` (Optional[str]): Error message if any. - -## CrawlerStrategy Classes - -The `CrawlerStrategy` classes define how the web crawling is executed. - -### CrawlerStrategy Base Class - -An abstract base class for different crawler strategies. - -#### Methods - -- **`crawl(url: str, **kwargs) -> str`**: Crawls the specified URL. -- **`take_screenshot(save_path: str)`**: Takes a screenshot of the current page. -- **`update_user_agent(user_agent: str)`**: Updates the user agent for the browser. -- **`set_hook(hook_type: str, hook: Callable)`**: Sets a hook for various events. - -### LocalSeleniumCrawlerStrategy Class - -Uses Selenium to crawl web pages. - -#### Initialization - -```python -from crawl4ai.crawler_strategy import LocalSeleniumCrawlerStrategy - -strategy = LocalSeleniumCrawlerStrategy(js_code=["console.log('Hello, world!');"]) -``` - -#### Methods - -- **`crawl(url: str, **kwargs)`**: Crawls the specified URL. -- **`take_screenshot(save_path: str)`**: Takes a screenshot of the current page. -- **`update_user_agent(user_agent: str)`**: Updates the user agent for the browser. -- **`set_hook(hook_type: str, hook: Callable)`**: Sets a hook for various events. - -#### Example - -```python -result = strategy.crawl("https://www.example.com") -strategy.take_screenshot("screenshot.png") -strategy.update_user_agent("Mozilla/5.0") -strategy.set_hook("before_get_url", lambda: print("About to get URL")) -``` - -## ChunkingStrategy Classes - -The `ChunkingStrategy` classes define how the text from a web page is divided into chunks. - -### RegexChunking Class - -Splits text using regular expressions. - -#### Initialization - -```python -from crawl4ai.chunking_strategy import RegexChunking - -chunker = RegexChunking(patterns=[r'\n\n']) -``` - -#### Methods - -- **`chunk(text: str) -> List[str]`**: Splits the text into chunks. - -#### Example - -```python -chunks = chunker.chunk("This is a sample text. It will be split into chunks.") -``` - -### NlpSentenceChunking Class - -Uses NLP to split text into sentences. - -#### Initialization - -```python -from crawl4ai.chunking_strategy import NlpSentenceChunking - -chunker = NlpSentenceChunking() -``` - -#### Methods - -- **`chunk(text: str) -> List[str]`**: Splits the text into sentences. - -#### Example - -```python -chunks = chunker.chunk("This is a sample text. It will be split into sentences.") -``` - -### TopicSegmentationChunking Class - -Uses the TextTiling algorithm to segment text into topics. - -#### Initialization - -```python -from crawl4ai.chunking_strategy import TopicSegmentationChunking - -chunker = TopicSegmentationChunking(num_keywords=3) -``` - -#### Methods - -- **`chunk(text: str) -> List[str]`**: Splits the text into topic-based segments. - -#### Example - -```python -chunks = chunker.chunk("This is a sample text. It will be split into topic-based segments.") -``` - -### FixedLengthWordChunking Class - -Splits text into chunks of fixed length based on the number of words. - -#### Initialization - -```python -from crawl4ai.chunking_strategy import FixedLengthWordChunking - -chunker = FixedLengthWordChunking(chunk_size=100) -``` - -#### Methods - -- **`chunk(text: str) -> List[str]`**: Splits the text into fixed-length word chunks. - -#### Example - -```python -chunks = chunker.chunk("This is a sample text. It will be split into fixed-length word chunks.") -``` - -### SlidingWindowChunking Class - -Uses a sliding window approach to chunk text. - -#### Initialization - -```python -from crawl4ai.chunking_strategy import SlidingWindowChunking - -chunker = SlidingWindowChunking(window_size=100, step=50) -``` - -#### Methods - -- **`chunk(text: str) -> List[str]`**: Splits the text using a sliding window approach. - -#### Example - -```python -chunks = chunker.chunk("This is a sample text. It will be split using a sliding window approach.") -``` - -## ExtractionStrategy Classes - -The `ExtractionStrategy` classes define how meaningful content is extracted from the chunks. - -### NoExtractionStrategy Class - -Returns the entire HTML content without any modification. - -#### Initialization - -```python -from crawl4ai.extraction_strategy import NoExtractionStrategy - -extractor = NoExtractionStrategy() -``` - -#### Methods - -- **`extract(url: str, html: str) -> str`**: Returns the HTML content. - -#### Example - -```python -extracted_content = extractor.extract(url="https://www.example.com", html="...") -``` - -### LLMExtractionStrategy Class - -Uses a Language Model to extract meaningful blocks from HTML. - -#### Initialization - -```python -from crawl4ai.extraction_strategy import LLMExtractionStrategy - -extractor = LLMExtractionStrategy(provider='openai', api_token='your_api_token', instruction='Extract only news about AI.') -``` - -#### Methods - -- **`extract(url: str, html: str) -> str`**: Extracts meaningful content using the LLM. - -#### Example - -```python -extracted_content = extractor.extract(url="https://www.example.com", html="...") -``` - -### CosineStrategy Class - -Clusters text chunks based on cosine similarity. - -#### Initialization - -```python -from crawl4ai.extraction_strategy import CosineStrategy - -extractor = CosineStrategy(semantic_filter="finance", word_count_threshold=10) -``` - -#### Methods - -- **`extract(url: str, html: str) -> str`**: Extracts clusters of text based on cosine similarity. - -#### Example - -```python -extracted_content = extractor.extract(url="https://www.example.com", html="...") -``` - -### TopicExtractionStrategy Class - -Uses the TextTiling algorithm to segment HTML content into topics and extract keywords. - -#### Initialization - -```python -from crawl4ai.extraction_strategy import TopicExtractionStrategy - -extractor = TopicExtractionStrategy(num_keywords=3) -``` - -#### Methods - -- **`extract(url: str, html: str) -> str`**: Extracts topic-based segments and keywords. - -#### Example - -```python -extracted_content = extractor.extract(url="https://www.example.com", html="...") -``` - -## Parameters - -Here are the common parameters used across various classes and methods: - -- **`url`** (str): The URL to crawl. -- **`html`** (str): The HTML content of the page. -- **`user_agent`** (str): The user agent for the HTTP requests. -- **`patterns`** (list): A list of regular expression patterns for chunking. -- **`num_keywords`** (int): Number of keywords for topic extraction. -- **`chunk_size`** (int): Number of words in each chunk. -- **`window_size`** (int): Number of words in the sliding window. -- **`step`** (int): Step size for the sliding window. -- **`semantic_filter`** (str): Keywords for filtering relevant documents. -- **`word_count_threshold`** (int): Minimum number of words per cluster. -- **`max_dist`** (float): Maximum cophenetic distance for clustering. -- **`linkage_method`** (str): Linkage method for hierarchical clustering. -- **`top_k`** (int): Number of top categories to extract. -- **`provider`** ( - -str): Provider for language model completions. -- **`api_token`** (str): API token for the provider. -- **`instruction`** (str): Instruction to guide the LLM extraction. - -## Conclusion - -This detailed API documentation provides a thorough understanding of the classes, methods, and parameters in the Crawl4AI library. With this knowledge, you can effectively use the API to perform advanced web crawling and data extraction tasks. \ No newline at end of file diff --git a/docs/md _sync/changelog.md b/docs/md _sync/changelog.md deleted file mode 100644 index 03e490f..0000000 --- a/docs/md _sync/changelog.md +++ /dev/null @@ -1,102 +0,0 @@ -# Changelog - -## [v0.2.77] - 2024-08-04 - -Significant improvements in text processing and performance: - -- 🚀 **Dependency reduction**: Removed dependency on spaCy model for text chunk labeling in cosine extraction strategy. -- 🤖 **Transformer upgrade**: Implemented text sequence classification using a transformer model for labeling text chunks. -- ⚡ **Performance enhancement**: Improved model loading speed due to removal of spaCy dependency. -- 🔧 **Future-proofing**: Laid groundwork for potential complete removal of spaCy dependency in future versions. - -These changes address issue #68 and provide a foundation for faster, more efficient text processing in Crawl4AI. - -## [v0.2.76] - 2024-08-02 - -Major improvements in functionality, performance, and cross-platform compatibility! 🚀 - -- 🐳 **Docker enhancements**: Significantly improved Dockerfile for easy installation on Linux, Mac, and Windows. -- 🌐 **Official Docker Hub image**: Launched our first official image on Docker Hub for streamlined deployment. -- 🔧 **Selenium upgrade**: Removed dependency on ChromeDriver, now using Selenium's built-in capabilities for better compatibility. -- 🖼️ **Image description**: Implemented ability to generate textual descriptions for extracted images from web pages. -- ⚡ **Performance boost**: Various improvements to enhance overall speed and performance. - -A big shoutout to our amazing community contributors: -- [@aravindkarnam](https://github.com/aravindkarnam) for developing the textual description extraction feature. -- [@FractalMind](https://github.com/FractalMind) for creating the first official Docker Hub image and fixing Dockerfile errors. -- [@ketonkss4](https://github.com/ketonkss4) for identifying Selenium's new capabilities, helping us reduce dependencies. - -Your contributions are driving Crawl4AI forward! 🙌 - -## [v0.2.75] - 2024-07-19 - -Minor improvements for a more maintainable codebase: - -- 🔄 Fixed typos in `chunking_strategy.py` and `crawler_strategy.py` to improve code readability -- 🔄 Removed `.test_pads/` directory from `.gitignore` to keep our repository clean and organized - -These changes may seem small, but they contribute to a more stable and sustainable codebase. By fixing typos and updating our `.gitignore` settings, we're ensuring that our code is easier to maintain and scale in the long run. - - -## v0.2.74 - 2024-07-08 -A slew of exciting updates to improve the crawler's stability and robustness! 🎉 - -- 💻 **UTF encoding fix**: Resolved the Windows \"charmap\" error by adding UTF encoding. -- 🛡️ **Error handling**: Implemented MaxRetryError exception handling in LocalSeleniumCrawlerStrategy. -- 🧹 **Input sanitization**: Improved input sanitization and handled encoding issues in LLMExtractionStrategy. -- 🚮 **Database cleanup**: Removed existing database file and initialized a new one. - -## [v0.2.73] - 2024-07-03 - -💡 In this release, we've bumped the version to v0.2.73 and refreshed our documentation to ensure you have the best experience with our project. - -* Supporting website need "with-head" mode to crawl the website with head. -* Fixing the installation issues for setup.py and dockerfile. -* Resolve multiple issues. - -## [v0.2.72] - 2024-06-30 - -This release brings exciting updates and improvements to our project! 🎉 - -* 📚 **Documentation Updates**: Our documentation has been revamped to reflect the latest changes and additions. -* 🚀 **New Modes in setup.py**: We've added support for three new modes in setup.py: default, torch, and transformers. This enhances the project's flexibility and usability. -* 🐳 **Docker File Updates**: The Docker file has been updated to ensure seamless compatibility with the new modes and improvements. -* 🕷️ **Temporary Solution for Headless Crawling**: We've implemented a temporary solution to overcome issues with crawling websites in headless mode. - -These changes aim to improve the overall user experience, provide more flexibility, and enhance the project's performance. We're thrilled to share these updates with you and look forward to continuing to evolve and improve our project! - -## [0.2.71] - 2024-06-26 - -**Improved Error Handling and Performance** 🚧 - -* 🚫 Refactored `crawler_strategy.py` to handle exceptions and provide better error messages, making it more robust and reliable. -* 💻 Optimized the `get_content_of_website_optimized` function in `utils.py` for improved performance, reducing potential bottlenecks. -* 💻 Updated `utils.py` with the latest changes, ensuring consistency and accuracy. -* 🚫 Migrated to `ChromeDriverManager` to resolve Chrome driver download issues, providing a smoother user experience. - -These changes focus on refining the existing codebase, resulting in a more stable, efficient, and user-friendly experience. With these improvements, you can expect fewer errors and better performance in the crawler strategy and utility functions. - -## [0.2.71] - 2024-06-25 -### Fixed -- Speed up twice the extraction function. - -## [0.2.6] - 2024-06-22 -### Fixed -- Fix issue #19: Update Dockerfile to ensure compatibility across multiple platforms. - -## [0.2.5] - 2024-06-18 -### Added -- Added five important hooks to the crawler: - - on_driver_created: Called when the driver is ready for initializations. - - before_get_url: Called right before Selenium fetches the URL. - - after_get_url: Called after Selenium fetches the URL. - - before_return_html: Called when the data is parsed and ready. - - on_user_agent_updated: Called when the user changes the user_agent, causing the driver to reinitialize. -- Added an example in `quickstart.py` in the example folder under the docs. -- Enhancement issue #24: Replaced inline HTML tags (e.g., DEL, INS, SUB, ABBR) with textual format for better context handling in LLM. -- Maintaining the semantic context of inline tags (e.g., abbreviation, DEL, INS) for improved LLM-friendliness. -- Updated Dockerfile to ensure compatibility across multiple platforms (Hopefully!). - -## [0.2.4] - 2024-06-17 -### Fixed -- Fix issue #22: Use MD5 hash for caching HTML files to handle long URLs diff --git a/docs/md _sync/contact.md b/docs/md _sync/contact.md deleted file mode 100644 index 85faa21..0000000 --- a/docs/md _sync/contact.md +++ /dev/null @@ -1,25 +0,0 @@ -# Contact -If you have any questions, suggestions, or feedback, please feel free to reach out to us: - -- GitHub: [unclecode](https://github.com/unclecode) -- Twitter: [@unclecode](https://twitter.com/unclecode) -- Website: [crawl4ai.com](https://crawl4ai.com) - - -## Contributing 🤝 - -We welcome contributions from the open-source community to help improve Crawl4AI and make it even more valuable for AI enthusiasts and developers. To contribute, please follow these steps: - -1. Fork the repository. -2. Create a new branch for your feature or bug fix. -3. Make your changes and commit them with descriptive messages. -4. Push your changes to your forked repository. -5. Submit a pull request to the main repository. - -For more information on contributing, please see our [contribution guidelines](https://github.com/unclecode/crawl4ai/blob/main/CONTRIBUTING.md). - -## License 📄 - -Crawl4AI is released under the [Apache 2.0 License](https://github.com/unclecode/crawl4ai/blob/main/LICENSE). - -Let's work together to make the web more accessible and useful for AI applications! 💪🌐🤖 diff --git a/docs/md _sync/demo.md b/docs/md _sync/demo.md deleted file mode 100644 index 4ca6c75..0000000 --- a/docs/md _sync/demo.md +++ /dev/null @@ -1,231 +0,0 @@ -# Interactive Demo for Crowler -
    -
    -
    - Enter URL and Options -
    - - -
    -
    - - -
    -
    - -
    - -
    -
    - -
    -
    Loading... Please wait.
    -
    - -
    -

    Response

    -
    -
      -
    • Markdown
    • -
    • Cleaned HTML
    • -
    • Media
    • -
    • Extracted Content
    • -
    • Screenshot
    • -
    • Python Code
    • -
    -
    -
    -
    - - -
    -
    -
    -
    - - - - - - - - - - -
    -
    - - - - -
    \ No newline at end of file diff --git a/docs/md _sync/examples/hooks_auth.md b/docs/md _sync/examples/hooks_auth.md deleted file mode 100644 index 2b4c270..0000000 --- a/docs/md _sync/examples/hooks_auth.md +++ /dev/null @@ -1,100 +0,0 @@ -# Hooks & Auth - -Crawl4AI allows you to customize the behavior of the web crawler using hooks. Hooks are functions that are called at specific points in the crawling process, allowing you to modify the crawler's behavior or perform additional actions. This example demonstrates how to use various hooks to customize the crawling process. - -## Example: Using Crawler Hooks - -Let's see how we can customize the crawler using hooks! In this example, we'll: - -1. Maximize the browser window and log in to a website when the driver is created. -2. Add a custom header before fetching the URL. -3. Log the current URL after fetching it. -4. Log the length of the HTML before returning it. - -### Hook Definitions - -```python -from crawl4ai.web_crawler import WebCrawler -from crawl4ai.crawler_strategy import * - -def on_driver_created(driver): - print("[HOOK] on_driver_created") - # Example customization: maximize the window - driver.maximize_window() - - # Example customization: logging in to a hypothetical website - driver.get('https://example.com/login') - - from selenium.webdriver.support.ui import WebDriverWait - from selenium.webdriver.common.by import By - from selenium.webdriver.support import expected_conditions as EC - - WebDriverWait(driver, 10).until( - EC.presence_of_element_located((By.NAME, 'username')) - ) - driver.find_element(By.NAME, 'username').send_keys('testuser') - driver.find_element(By.NAME, 'password').send_keys('password123') - driver.find_element(By.NAME, 'login').click() - WebDriverWait(driver, 10).until( - EC.presence_of_element_located((By.ID, 'welcome')) - ) - # Add a custom cookie - driver.add_cookie({'name': 'test_cookie', 'value': 'cookie_value'}) - return driver - - -def before_get_url(driver): - print("[HOOK] before_get_url") - # Example customization: add a custom header - # Enable Network domain for sending headers - driver.execute_cdp_cmd('Network.enable', {}) - # Add a custom header - driver.execute_cdp_cmd('Network.setExtraHTTPHeaders', {'headers': {'X-Test-Header': 'test'}}) - return driver - -def after_get_url(driver): - print("[HOOK] after_get_url") - # Example customization: log the URL - print(driver.current_url) - return driver - -def before_return_html(driver, html): - print("[HOOK] before_return_html") - # Example customization: log the HTML - print(len(html)) - return driver -``` - -### Using the Hooks with the WebCrawler - -```python -print("\n🔗 [bold cyan]Using Crawler Hooks: Let's see how we can customize the crawler using hooks![/bold cyan]", True) -crawler_strategy = LocalSeleniumCrawlerStrategy(verbose=True) -crawler_strategy.set_hook('on_driver_created', on_driver_created) -crawler_strategy.set_hook('before_get_url', before_get_url) -crawler_strategy.set_hook('after_get_url', after_get_url) -crawler_strategy.set_hook('before_return_html', before_return_html) -crawler = WebCrawler(verbose=True, crawler_strategy=crawler_strategy) -crawler.warmup() - -result = crawler.run(url="https://example.com") - -print("[LOG] 📦 [bold yellow]Crawler Hooks result:[/bold yellow]") -print(result) -``` - -### Explanation - -- `on_driver_created`: This hook is called when the Selenium driver is created. In this example, it maximizes the window, logs in to a website, and adds a custom cookie. -- `before_get_url`: This hook is called right before Selenium fetches the URL. In this example, it adds a custom HTTP header. -- `after_get_url`: This hook is called after Selenium fetches the URL. In this example, it logs the current URL. -- `before_return_html`: This hook is called before returning the HTML content. In this example, it logs the length of the HTML content. - -### Additional Ideas - -- **Add custom headers to requests**: You can add custom headers to the requests using the `before_get_url` hook. -- **Perform safety checks**: Use the hooks to perform safety checks before the crawling process starts. -- **Modify the HTML content**: Use the `before_return_html` hook to modify the HTML content before it is returned. -- **Log additional information**: Use the hooks to log additional information for debugging or monitoring purposes. - -By using these hooks, you can customize the behavior of the crawler to suit your specific needs. diff --git a/docs/md _sync/examples/index.md b/docs/md _sync/examples/index.md deleted file mode 100644 index 14380e3..0000000 --- a/docs/md _sync/examples/index.md +++ /dev/null @@ -1,29 +0,0 @@ -# Examples - -Welcome to the examples section of Crawl4AI documentation! In this section, you will find practical examples demonstrating how to use Crawl4AI for various web crawling and data extraction tasks. Each example is designed to showcase different features and capabilities of the library. - -## Examples Index - -### [LLM Extraction](llm_extraction.md) - -This example demonstrates how to use Crawl4AI to extract information using Large Language Models (LLMs). You will learn how to configure the `LLMExtractionStrategy` to get structured data from web pages. - -### [JS Execution & CSS Filtering](js_execution_css_filtering.md) - -Learn how to execute custom JavaScript code and filter data using CSS selectors. This example shows how to perform complex web interactions and extract specific content from web pages. - -### [Hooks & Auth](hooks_auth.md) - -This example covers the use of custom hooks for authentication and other pre-crawling tasks. You will see how to set up hooks to modify headers, authenticate sessions, and perform other preparatory actions before crawling. - -### [Summarization](summarization.md) - -Discover how to use Crawl4AI to summarize web page content. This example demonstrates the summarization capabilities of the library, helping you extract concise information from lengthy web pages. - -### [Research Assistant](research_assistant.md) - -In this example, Crawl4AI is used as a research assistant to gather and organize information from multiple sources. You will learn how to use various extraction and chunking strategies to compile a comprehensive report. - ---- - -Each example includes detailed explanations and code snippets to help you understand and implement the features in your projects. Click on the links to explore each example and start making the most of Crawl4AI! diff --git a/docs/md _sync/examples/js_execution_css_filtering.md b/docs/md _sync/examples/js_execution_css_filtering.md deleted file mode 100644 index 0592f09..0000000 --- a/docs/md _sync/examples/js_execution_css_filtering.md +++ /dev/null @@ -1,44 +0,0 @@ -# JS Execution & CSS Filtering - -In this example, we'll demonstrate how to use Crawl4AI to execute JavaScript, filter data with CSS selectors, and use a cosine similarity strategy to extract relevant content. This approach is particularly useful when you need to interact with dynamic content on web pages, such as clicking "Load More" buttons. - -## Example: Extracting Structured Data - -```python -# Import necessary modules -from crawl4ai import WebCrawler -from crawl4ai.chunking_strategy import * -from crawl4ai.extraction_strategy import * -from crawl4ai.crawler_strategy import * - -# Define the JavaScript code to click the "Load More" button -js_code = [""" -const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); -loadMoreButton && loadMoreButton.click(); -"""] - -crawler = WebCrawler(verbose=True) -crawler.warmup() -# Run the crawler with keyword filtering and CSS selector -result = crawler.run( - url="https://www.nbcnews.com/business", - js=js_code, - css_selector="p", - extraction_strategy=CosineStrategy( - semantic_filter="technology", - ), -) - -# Display the extracted result -print(result) -``` - -### Explanation - -1. **JavaScript Execution**: The `js_code` variable contains JavaScript code that simulates clicking a "Load More" button. This is useful for loading additional content dynamically. -2. **CSS Selector**: The `css_selector="p"` parameter ensures that only paragraph (`

    `) tags are extracted from the web page. -3. **Extraction Strategy**: The `CosineStrategy` is used with a semantic filter for "technology" to extract relevant content based on cosine similarity. - -## Try It Yourself - -This example demonstrates the power and flexibility of Crawl4AI in handling complex web interactions and extracting meaningful data. You can customize the JavaScript code, CSS selectors, and extraction strategies to suit your specific requirements. diff --git a/docs/md _sync/examples/llm_extraction.md b/docs/md _sync/examples/llm_extraction.md deleted file mode 100644 index b780572..0000000 --- a/docs/md _sync/examples/llm_extraction.md +++ /dev/null @@ -1,90 +0,0 @@ -# LLM Extraction - -Crawl4AI allows you to use Language Models (LLMs) to extract structured data or relevant content from web pages. Below are two examples demonstrating how to use LLMExtractionStrategy for different purposes. - -## Example 1: Extract Structured Data - -In this example, we use the `LLMExtractionStrategy` to extract structured data (model names and their fees) from the OpenAI pricing page. - -```python -import os -import time -from crawl4ai.web_crawler import WebCrawler -from crawl4ai.chunking_strategy import * -from crawl4ai.extraction_strategy import * -from crawl4ai.crawler_strategy import * - -url = r'https://openai.com/api/pricing/' - -crawler = WebCrawler() -crawler.warmup() - -from pydantic import BaseModel, Field - -class OpenAIModelFee(BaseModel): - model_name: str = Field(..., description="Name of the OpenAI model.") - input_fee: str = Field(..., description="Fee for input token for the OpenAI model.") - output_fee: str = Field(..., description="Fee for output token for the OpenAI model.") - -result = crawler.run( - url=url, - word_count_threshold=1, - extraction_strategy= LLMExtractionStrategy( - provider= "openai/gpt-4o", api_token = os.getenv('OPENAI_API_KEY'), - schema=OpenAIModelFee.model_json_schema(), - extraction_type="schema", - instruction="From the crawled content, extract all mentioned model names along with their "\ - "fees for input and output tokens. Make sure not to miss anything in the entire content. "\ - 'One extracted model JSON format should look like this: '\ - '{ "model_name": "GPT-4", "input_fee": "US$10.00 / 1M tokens", "output_fee": "US$30.00 / 1M tokens" }' - ), - bypass_cache=True, -) - -model_fees = json.loads(result.extracted_content) - -print(len(model_fees)) - -with open(".data/data.json", "w", encoding="utf-8") as f: - f.write(result.extracted_content) -``` - -## Example 2: Extract Relevant Content - -In this example, we instruct the LLM to extract only content related to technology from the NBC News business page. - -```python -crawler = WebCrawler() -crawler.warmup() - -result = crawler.run( - url="https://www.nbcnews.com/business", - extraction_strategy=LLMExtractionStrategy( - provider="openai/gpt-4o", - api_token=os.getenv('OPENAI_API_KEY'), - instruction="Extract only content related to technology" - ), - bypass_cache=True, - ) - -model_fees = json.loads(result.extracted_content) - -print(len(model_fees)) - -with open(".data/data.json", "w", encoding="utf-8") as f: - f.write(result.extracted_content) -``` - -## Customizing LLM Provider - -Under the hood, Crawl4AI uses the `litellm` library, which allows you to use any LLM provider you want. Just pass the correct model name and API token. - -```python -extraction_strategy=LLMExtractionStrategy( - provider="your_llm_provider/model_name", - api_token="your_api_token", - instruction="Your extraction instruction" -) -``` - -This flexibility allows you to integrate with various LLM providers and tailor the extraction process to your specific needs. diff --git a/docs/md _sync/examples/research_assistant.md b/docs/md _sync/examples/research_assistant.md deleted file mode 100644 index 9284fbe..0000000 --- a/docs/md _sync/examples/research_assistant.md +++ /dev/null @@ -1,248 +0,0 @@ -## Research Assistant Example - -This example demonstrates how to build a research assistant using `Chainlit` and `Crawl4AI`. The assistant will be capable of crawling web pages for information and answering questions based on the crawled content. Additionally, it integrates speech-to-text functionality for audio inputs. - -### Step-by-Step Guide - -1. **Install Required Packages** - - Ensure you have the necessary packages installed. You need `chainlit`, `groq`, `requests`, and `openai`. - - ```bash - pip install chainlit groq requests openai - ``` - -2. **Import Libraries** - - Import all the necessary modules and initialize the OpenAI client. - - ```python - import os - import time - from openai import AsyncOpenAI - import chainlit as cl - import re - import requests - from io import BytesIO - from chainlit.element import ElementBased - from groq import Groq - - from concurrent.futures import ThreadPoolExecutor - - client = AsyncOpenAI(base_url="https://api.groq.com/openai/v1", api_key=os.getenv("GROQ_API_KEY")) - - # Instrument the OpenAI client - cl.instrument_openai() - ``` - -3. **Set Configuration** - - Define the model settings for the assistant. - - ```python - settings = { - "model": "llama3-8b-8192", - "temperature": 0.5, - "max_tokens": 500, - "top_p": 1, - "frequency_penalty": 0, - "presence_penalty": 0, - } - ``` - -4. **Define Utility Functions** - - - **Extract URLs from Text**: Use regex to find URLs in messages. - - ```python - def extract_urls(text): - url_pattern = re.compile(r'(https?://\S+)') - return url_pattern.findall(text) - ``` - - - **Crawl URL**: Send a request to `Crawl4AI` to fetch the content of a URL. - - ```python - def crawl_url(url): - data = { - "urls": [url], - "include_raw_html": True, - "word_count_threshold": 10, - "extraction_strategy": "NoExtractionStrategy", - "chunking_strategy": "RegexChunking" - } - response = requests.post("https://crawl4ai.com/crawl", json=data) - response_data = response.json() - response_data = response_data['results'][0] - return response_data['markdown'] - ``` - -5. **Initialize Chat Start Event** - - Set up the initial chat message and user session. - - ```python - @cl.on_chat_start - async def on_chat_start(): - cl.user_session.set("session", { - "history": [], - "context": {} - }) - await cl.Message( - content="Welcome to the chat! How can I assist you today?" - ).send() - ``` - -6. **Handle Incoming Messages** - - Process user messages, extract URLs, and crawl them concurrently. Update the chat history and system message. - - ```python - @cl.on_message - async def on_message(message: cl.Message): - user_session = cl.user_session.get("session") - - # Extract URLs from the user's message - urls = extract_urls(message.content) - - futures = [] - with ThreadPoolExecutor() as executor: - for url in urls: - futures.append(executor.submit(crawl_url, url)) - - results = [future.result() for future in futures] - - for url, result in zip(urls, results): - ref_number = f"REF_{len(user_session['context']) + 1}" - user_session["context"][ref_number] = { - "url": url, - "content": result - } - - user_session["history"].append({ - "role": "user", - "content": message.content - }) - - # Create a system message that includes the context - context_messages = [ - f'\n{data["content"]}\n' - for ref, data in user_session["context"].items() - ] - if context_messages: - system_message = { - "role": "system", - "content": ( - "You are a helpful bot. Use the following context for answering questions. " - "Refer to the sources using the REF number in square brackets, e.g., [1], only if the source is given in the appendices below.\n\n" - "If the question requires any information from the provided appendices or context, refer to the sources. " - "If not, there is no need to add a references section. " - "At the end of your response, provide a reference section listing the URLs and their REF numbers only if sources from the appendices were used.\n\n" - "\n\n".join(context_messages) - ) - } - else: - system_message = { - "role": "system", - "content": "You are a helpful assistant." - } - - msg = cl.Message(content="") - await msg.send() - - # Get response from the LLM - stream = await client.chat.completions.create( - messages=[ - system_message, - *user_session["history"] - ], - stream=True, - **settings - ) - - assistant_response = "" - async for part in stream: - if token := part.choices[0].delta.content: - assistant_response += token - await msg.stream_token(token) - - # Add assistant message to the history - user_session["history"].append({ - "role": "assistant", - "content": assistant_response - }) - await msg.update() - - # Append the reference section to the assistant's response - reference_section = "\n\nReferences:\n" - for ref, data in user_session["context"].items(): - reference_section += f"[{ref.split('_')[1]}]: {data['url']}\n" - - msg.content += reference_section - await msg.update() - ``` - -7. **Handle Audio Input** - - Capture and transcribe audio input. Store the audio buffer and transcribe it when the audio ends. - - ```python - @cl.on_audio_chunk - async def on_audio_chunk(chunk: cl.AudioChunk): - if chunk.isStart: - buffer = BytesIO() - buffer.name = f"input_audio.{chunk.mimeType.split('/')[1]}" - cl.user_session.set("audio_buffer", buffer) - cl.user_session.set("audio_mime_type", chunk.mimeType) - - cl.user_session.get("audio_buffer").write(chunk.data) - - @cl.step(type="tool") - async def speech_to_text(audio_file): - cli = Groq() - response = await client.audio.transcriptions.create( - model="whisper-large-v3", file=audio_file - ) - return response.text - - @cl.on_audio_end - async def on_audio_end(elements: list[ElementBased]): - audio_buffer: BytesIO = cl.user_session.get("audio_buffer") - audio_buffer.seek(0) - audio_file = audio_buffer.read() - audio_mime_type: str = cl.user_session.get("audio_mime_type") - - start_time = time.time() - transcription = await speech_to_text((audio_buffer.name, audio_file, audio_mime_type)) - end_time = time.time() - print(f"Transcription took {end_time - start_time} seconds") - - user_msg = cl.Message( - author="You", - type="user_message", - content=transcription - ) - await user_msg.send() - await on_message(user_msg) - ``` - -8. **Run the Chat Application** - - Start the Chainlit application. - - ```python - if __name__ == "__main__": - from chainlit.cli import run_chainlit - run_chainlit(__file__) - ``` - -### Explanation - -- **Libraries and Configuration**: Import necessary libraries and configure the OpenAI client. -- **Utility Functions**: Define functions to extract URLs and crawl them. -- **Chat Start Event**: Initialize chat session and welcome message. -- **Message Handling**: Extract URLs, crawl them concurrently, and update chat history and context. -- **Audio Handling**: Capture, buffer, and transcribe audio input, then process the transcription as text. -- **Running the Application**: Start the Chainlit server to interact with the assistant. - -This example showcases how to create an interactive research assistant that can fetch, process, and summarize web content, along with handling audio inputs for a seamless user experience. diff --git a/docs/md _sync/examples/summarization.md b/docs/md _sync/examples/summarization.md deleted file mode 100644 index b817f69..0000000 --- a/docs/md _sync/examples/summarization.md +++ /dev/null @@ -1,108 +0,0 @@ -## Summarization Example - -This example demonstrates how to use `Crawl4AI` to extract a summary from a web page. The goal is to obtain the title, a detailed summary, a brief summary, and a list of keywords from the given page. - -### Step-by-Step Guide - -1. **Import Necessary Modules** - - First, import the necessary modules and classes. - - ```python - import os - import time - import json - from crawl4ai.web_crawler import WebCrawler - from crawl4ai.chunking_strategy import * - from crawl4ai.extraction_strategy import * - from crawl4ai.crawler_strategy import * - from pydantic import BaseModel, Field - ``` - -2. **Define the URL to be Crawled** - - Set the URL of the web page you want to summarize. - - ```python - url = r'https://marketplace.visualstudio.com/items?itemName=Unclecode.groqopilot' - ``` - -3. **Initialize the WebCrawler** - - Create an instance of the `WebCrawler` and call the `warmup` method. - - ```python - crawler = WebCrawler() - crawler.warmup() - ``` - -4. **Define the Data Model** - - Use Pydantic to define the structure of the extracted data. - - ```python - class PageSummary(BaseModel): - title: str = Field(..., description="Title of the page.") - summary: str = Field(..., description="Summary of the page.") - brief_summary: str = Field(..., description="Brief summary of the page.") - keywords: list = Field(..., description="Keywords assigned to the page.") - ``` - -5. **Run the Crawler** - - Set up and run the crawler with the `LLMExtractionStrategy`. Provide the necessary parameters, including the schema for the extracted data and the instruction for the LLM. - - ```python - result = crawler.run( - url=url, - word_count_threshold=1, - extraction_strategy=LLMExtractionStrategy( - provider="openai/gpt-4o", - api_token=os.getenv('OPENAI_API_KEY'), - schema=PageSummary.model_json_schema(), - extraction_type="schema", - apply_chunking=False, - instruction=( - "From the crawled content, extract the following details: " - "1. Title of the page " - "2. Summary of the page, which is a detailed summary " - "3. Brief summary of the page, which is a paragraph text " - "4. Keywords assigned to the page, which is a list of keywords. " - 'The extracted JSON format should look like this: ' - '{ "title": "Page Title", "summary": "Detailed summary of the page.", ' - '"brief_summary": "Brief summary in a paragraph.", "keywords": ["keyword1", "keyword2", "keyword3"] }' - ) - ), - bypass_cache=True, - ) - ``` - -6. **Process the Extracted Data** - - Load the extracted content into a JSON object and print it. - - ```python - page_summary = json.loads(result.extracted_content) - print(page_summary) - ``` - -7. **Save the Extracted Data** - - Save the extracted data to a file for further use. - - ```python - with open(".data/page_summary.json", "w", encoding="utf-8") as f: - f.write(result.extracted_content) - ``` - -### Explanation - -- **Importing Modules**: Import the necessary modules, including `WebCrawler` and `LLMExtractionStrategy` from `Crawl4AI`. -- **URL Definition**: Set the URL of the web page you want to crawl and summarize. -- **WebCrawler Initialization**: Create an instance of `WebCrawler` and call the `warmup` method to prepare the crawler. -- **Data Model Definition**: Define the structure of the data you want to extract using Pydantic's `BaseModel`. -- **Crawler Execution**: Run the crawler with the `LLMExtractionStrategy`, providing the schema and detailed instructions for the extraction process. -- **Data Processing**: Load the extracted content into a JSON object and print it to verify the results. -- **Data Saving**: Save the extracted data to a file for further use. - -This example demonstrates how to harness the power of `Crawl4AI` to perform advanced web crawling and data extraction tasks with minimal code. diff --git a/docs/md _sync/full_details/advanced_features.md b/docs/md _sync/full_details/advanced_features.md deleted file mode 100644 index 944f6e0..0000000 --- a/docs/md _sync/full_details/advanced_features.md +++ /dev/null @@ -1,138 +0,0 @@ -# Advanced Features - -Crawl4AI offers a range of advanced features that allow you to fine-tune your web crawling and data extraction process. This section will cover some of these advanced features, including taking screenshots, extracting media and links, customizing the user agent, using custom hooks, and leveraging CSS selectors. - -## Taking Screenshots 📸 - -One of the cool features of Crawl4AI is the ability to take screenshots of the web pages you're crawling. This can be particularly useful for visual verification or for capturing the state of dynamic content. - -Here's how you can take a screenshot: - -```python -from crawl4ai import WebCrawler -import base64 - -# Create the WebCrawler instance -crawler = WebCrawler() -crawler.warmup() - -# Run the crawler with the screenshot parameter -result = crawler.run(url="https://www.nbcnews.com/business", screenshot=True) - -# Save the screenshot to a file -with open("screenshot.png", "wb") as f: - f.write(base64.b64decode(result.screenshot)) - -print("Screenshot saved to 'screenshot.png'!") -``` - -In this example, we create a `WebCrawler` instance, warm it up, and then run it with the `screenshot` parameter set to `True`. The screenshot is saved as a base64 encoded string in the result, which we then decode and save as a PNG file. - -## Extracting Media and Links 🎨🔗 - -Crawl4AI can extract all media tags (images, audio, and video) and links (both internal and external) from a web page. This feature is useful for collecting multimedia content or analyzing link structures. - -Here's an example: - -```python -from crawl4ai import WebCrawler - -# Create the WebCrawler instance -crawler = WebCrawler() -crawler.warmup() - -# Run the crawler -result = crawler.run(url="https://www.nbcnews.com/business") - -print("Extracted media:", result.media) -print("Extracted links:", result.links) -``` - -In this example, the `result` object contains dictionaries for media and links, which you can access and use as needed. - -## Customizing the User Agent 🕵️‍♂️ - -Crawl4AI allows you to set a custom user agent for your HTTP requests. This can help you avoid detection by web servers or simulate different browsing environments. - -Here's how to set a custom user agent: - -```python -from crawl4ai import WebCrawler - -# Create the WebCrawler instance -crawler = WebCrawler() -crawler.warmup() - -# Run the crawler with a custom user agent -result = crawler.run(url="https://www.nbcnews.com/business", user_agent="Mozilla/5.0 (compatible; MyCrawler/1.0)") - -print("Crawl result:", result) -``` - -In this example, we specify a custom user agent string when running the crawler. - -## Using Custom Hooks 🪝 - -Hooks are a powerful feature in Crawl4AI that allow you to customize the crawling process at various stages. You can define hooks for actions such as driver initialization, before and after URL fetching, and before returning the HTML. - -Here's an example of using hooks: - -```python -from crawl4ai import WebCrawler -from selenium.webdriver.common.by import By -from selenium.webdriver.support.ui import WebDriverWait -from selenium.webdriver.support import expected_conditions as EC - -# Define the hooks -def on_driver_created(driver): - driver.maximize_window() - driver.get('https://example.com/login') - WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.NAME, 'username'))).send_keys('testuser') - driver.find_element(By.NAME, 'password').send_keys('password123') - driver.find_element(By.NAME, 'login').click() - return driver - -def before_get_url(driver): - driver.execute_cdp_cmd('Network.setExtraHTTPHeaders', {'headers': {'X-Test-Header': 'test'}}) - return driver - -# Create the WebCrawler instance -crawler = WebCrawler() -crawler.warmup() - -# Set the hooks -crawler.set_hook('on_driver_created', on_driver_created) -crawler.set_hook('before_get_url', before_get_url) - -# Run the crawler -result = crawler.run(url="https://example.com") - -print("Crawl result:", result) -``` - -In this example, we define hooks to handle driver initialization and custom headers before fetching the URL. - -## Using CSS Selectors 🎯 - -CSS selectors allow you to target specific elements on a web page for extraction. This can be useful for scraping structured content, such as articles or product details. - -Here's an example of using a CSS selector: - -```python -from crawl4ai import WebCrawler - -# Create the WebCrawler instance -crawler = WebCrawler() -crawler.warmup() - -# Run the crawler with a CSS selector to extract only H2 tags -result = crawler.run(url="https://www.nbcnews.com/business", css_selector="h2") - -print("Extracted H2 tags:", result.extracted_content) -``` - -In this example, we use the `css_selector` parameter to extract only the H2 tags from the web page. - ---- - -With these advanced features, you can leverage Crawl4AI to perform sophisticated web crawling and data extraction tasks. Whether you need to take screenshots, extract specific elements, customize the crawling process, or set custom headers, Crawl4AI provides the flexibility and power to meet your needs. Happy crawling! 🕷️🚀 diff --git a/docs/md _sync/full_details/crawl_request_parameters.md b/docs/md _sync/full_details/crawl_request_parameters.md deleted file mode 100644 index c32743c..0000000 --- a/docs/md _sync/full_details/crawl_request_parameters.md +++ /dev/null @@ -1,130 +0,0 @@ -# Crawl Request Parameters - -The `run` function in Crawl4AI is designed to be highly configurable, allowing you to customize the crawling and extraction process to suit your needs. Below are the parameters you can use with the `run` function, along with their descriptions, possible values, and examples. - -## Parameters - -### url (str) -**Description:** The URL of the webpage to crawl. -**Required:** Yes -**Example:** -```python -url = "https://www.nbcnews.com/business" -``` - -### word_count_threshold (int) -**Description:** The minimum number of words a block must contain to be considered meaningful. The default value is `5`. -**Required:** No -**Default Value:** `5` -**Example:** -```python -word_count_threshold = 10 -``` - -### extraction_strategy (ExtractionStrategy) -**Description:** The strategy to use for extracting content from the HTML. It must be an instance of `ExtractionStrategy`. If not provided, the default is `NoExtractionStrategy`. -**Required:** No -**Default Value:** `NoExtractionStrategy()` -**Example:** -```python -extraction_strategy = CosineStrategy(semantic_filter="finance") -``` - -### chunking_strategy (ChunkingStrategy) -**Description:** The strategy to use for chunking the text before processing. It must be an instance of `ChunkingStrategy`. The default value is `RegexChunking()`. -**Required:** No -**Default Value:** `RegexChunking()` -**Example:** -```python -chunking_strategy = NlpSentenceChunking() -``` - -### bypass_cache (bool) -**Description:** Whether to force a fresh crawl even if the URL has been previously crawled. The default value is `False`. -**Required:** No -**Default Value:** `False` -**Example:** -```python -bypass_cache = True -``` - -### css_selector (str) -**Description:** The CSS selector to target specific parts of the HTML for extraction. If not provided, the entire HTML will be processed. -**Required:** No -**Default Value:** `None` -**Example:** -```python -css_selector = "div.article-content" -``` - -### screenshot (bool) -**Description:** Whether to take screenshots of the page. The default value is `False`. -**Required:** No -**Default Value:** `False` -**Example:** -```python -screenshot = True -``` - -### user_agent (str) -**Description:** The user agent to use for the HTTP requests. If not provided, a default user agent will be used. -**Required:** No -**Default Value:** `None` -**Example:** -```python -user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" -``` - -### verbose (bool) -**Description:** Whether to enable verbose logging. The default value is `True`. -**Required:** No -**Default Value:** `True` -**Example:** -```python -verbose = True -``` - -### **kwargs -Additional keyword arguments that can be passed to customize the crawling process further. Some notable options include: - -- **only_text (bool):** Whether to extract only text content, excluding HTML tags. Default is `False`. - -**Example:** -```python -result = crawler.run( - url="https://www.nbcnews.com/business", - css_selector="p", - only_text=True -) -``` - -## Example Usage - -Here's an example of how to use the `run` function with various parameters: - -```python -from crawl4ai import WebCrawler -from crawl4ai.extraction_strategy import CosineStrategy -from crawl4ai.chunking_strategy import NlpSentenceChunking - -# Create the WebCrawler instance -crawler = WebCrawler() - -# Run the crawler with custom parameters -result = crawler.run( - url="https://www.nbcnews.com/business", - word_count_threshold=10, - extraction_strategy=CosineStrategy(semantic_filter="finance"), - chunking_strategy=NlpSentenceChunking(), - bypass_cache=True, - css_selector="div.article-content", - screenshot=True, - user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3", - verbose=True, - only_text=True -) - -print(result) -``` - -This example demonstrates how to configure various parameters to customize the crawling and extraction process using Crawl4AI. diff --git a/docs/md _sync/full_details/crawl_result_class.md b/docs/md _sync/full_details/crawl_result_class.md deleted file mode 100644 index f66e16d..0000000 --- a/docs/md _sync/full_details/crawl_result_class.md +++ /dev/null @@ -1,120 +0,0 @@ -# Crawl Result - -The `CrawlResult` class is the heart of Crawl4AI's output, encapsulating all the data extracted from a crawling session. This class contains various fields that store the results of the web crawling and extraction process. Let's break down each field and see what it holds. 🎉 - -## Class Definition - -```python -class CrawlResult(BaseModel): - url: str - html: str - success: bool - cleaned_html: Optional[str] = None - media: Dict[str, List[Dict]] = {} - links: Dict[str, List[Dict]] = {} - screenshot: Optional[str] = None - markdown: Optional[str] = None - extracted_content: Optional[str] = None - metadata: Optional[dict] = None - error_message: Optional[str] = None -``` - -## Fields Explanation - -### `url: str` -The URL that was crawled. This field simply stores the URL of the web page that was processed. - -### `html: str` -The raw HTML content of the web page. This is the unprocessed HTML source as retrieved by the crawler. - -### `success: bool` -A flag indicating whether the crawling and extraction were successful. If any error occurs during the process, this will be `False`. - -### `cleaned_html: Optional[str]` -The cleaned HTML content of the web page. This field holds the HTML after removing unwanted tags like ` diff --git a/docs/md _sync/introduction.md b/docs/md _sync/introduction.md deleted file mode 100644 index 6d1ad56..0000000 --- a/docs/md _sync/introduction.md +++ /dev/null @@ -1,29 +0,0 @@ -# Introduction - -Welcome to the documentation for Crawl4AI v0.2.5! 🕷️🤖 - -Crawl4AI is designed to simplify the process of crawling web pages and extracting useful information for large language models (LLMs) and AI applications. Whether you're using it as a REST API, a Python library, or through a Google Colab notebook, Crawl4AI provides powerful features to make web data extraction easier and more efficient. - -## Key Features ✨ - -- **🆓 Completely Free and Open-Source**: Crawl4AI is free to use and open-source, making it accessible for everyone. -- **🤖 LLM-Friendly Output Formats**: Supports JSON, cleaned HTML, and markdown formats. -- **🌍 Concurrent Crawling**: Crawl multiple URLs simultaneously to save time. -- **🎨 Media Extraction**: Extract all media tags including images, audio, and video. -- **🔗 Link Extraction**: Extract all external and internal links from web pages. -- **📚 Metadata Extraction**: Extract metadata from web pages for additional context. -- **🔄 Custom Hooks**: Define custom hooks for authentication, headers, and page modifications before crawling. -- **🕵️ User Agent Support**: Customize the user agent for HTTP requests. -- **🖼️ Screenshot Capability**: Take screenshots of web pages during crawling. -- **📜 JavaScript Execution**: Execute custom JavaScripts before crawling. -- **📚 Advanced Chunking and Extraction Strategies**: Utilize topic-based, regex, sentence chunking, cosine clustering, and LLM extraction strategies. -- **🎯 CSS Selector Support**: Extract specific content using CSS selectors. -- **📝 Instruction/Keyword Refinement**: Pass instructions or keywords to refine the extraction process. - -Check the [Changelog](https://github.com/unclecode/crawl4ai/blob/main/CHANGELOG.md) for more details. - -## Power and Simplicity of Crawl4AI 🚀 - -Crawl4AI provides an easy way to crawl and extract data from web pages without installing any library. You can use the REST API on our server or run the local server on your machine. For more advanced control, use the Python library to customize your crawling and extraction strategies. - -Explore the documentation to learn more about the features, installation process, usage examples, and how to contribute to Crawl4AI. Let's make the web more accessible and useful for AI applications! 💪🌐🤖 diff --git a/docs/md _sync/quickstart.md b/docs/md _sync/quickstart.md deleted file mode 100644 index a0c1a2c..0000000 --- a/docs/md _sync/quickstart.md +++ /dev/null @@ -1,204 +0,0 @@ -# Quick Start Guide 🚀 - -Welcome to the Crawl4AI Quickstart Guide! In this tutorial, we'll walk you through the basic usage of Crawl4AI with a friendly and humorous tone. We'll cover everything from basic usage to advanced features like chunking and extraction strategies. Let's dive in! 🌟 - -## Getting Started 🛠️ - -First, let's create an instance of `WebCrawler` and call the `warmup()` function. This might take a few seconds the first time you run Crawl4AI, as it loads the required model files. - -```python -from crawl4ai import WebCrawler - -def create_crawler(): - crawler = WebCrawler(verbose=True) - crawler.warmup() - return crawler - -crawler = create_crawler() -``` - -### Basic Usage - -Simply provide a URL and let Crawl4AI do the magic! - -```python -result = crawler.run(url="https://www.nbcnews.com/business") -print(f"Basic crawl result: {result}") -``` - -### Taking Screenshots 📸 - -Let's take a screenshot of the page! - -```python -result = crawler.run(url="https://www.nbcnews.com/business", screenshot=True) -with open("screenshot.png", "wb") as f: - f.write(base64.b64decode(result.screenshot)) -print("Screenshot saved to 'screenshot.png'!") -``` - -### Understanding Parameters 🧠 - -By default, Crawl4AI caches the results of your crawls. This means that subsequent crawls of the same URL will be much faster! Let's see this in action. - -First crawl (caches the result): -```python -result = crawler.run(url="https://www.nbcnews.com/business") -print(f"First crawl result: {result}") -``` - -Force to crawl again: -```python -result = crawler.run(url="https://www.nbcnews.com/business", bypass_cache=True) -print(f"Second crawl result: {result}") -``` - -### Adding a Chunking Strategy 🧩 - -Let's add a chunking strategy: `RegexChunking`! This strategy splits the text based on a given regex pattern. - -```python -from crawl4ai.chunking_strategy import RegexChunking - -result = crawler.run( - url="https://www.nbcnews.com/business", - chunking_strategy=RegexChunking(patterns=["\n\n"]) -) -print(f"RegexChunking result: {result}") -``` - -You can also use `NlpSentenceChunking` which splits the text into sentences using NLP techniques. - -```python -from crawl4ai.chunking_strategy import NlpSentenceChunking - -result = crawler.run( - url="https://www.nbcnews.com/business", - chunking_strategy=NlpSentenceChunking() -) -print(f"NlpSentenceChunking result: {result}") -``` - -### Adding an Extraction Strategy 🧠 - -Let's get smarter with an extraction strategy: `CosineStrategy`! This strategy uses cosine similarity to extract semantically similar blocks of text. - -```python -from crawl4ai.extraction_strategy import CosineStrategy - -result = crawler.run( - url="https://www.nbcnews.com/business", - extraction_strategy=CosineStrategy( - word_count_threshold=10, - max_dist=0.2, - linkage_method="ward", - top_k=3 - ) -) -print(f"CosineStrategy result: {result}") -``` - -You can also pass other parameters like `semantic_filter` to extract specific content. - -```python -result = crawler.run( - url="https://www.nbcnews.com/business", - extraction_strategy=CosineStrategy( - semantic_filter="inflation rent prices" - ) -) -print(f"CosineStrategy result with semantic filter: {result}") -``` - -### Using LLMExtractionStrategy 🤖 - -Time to bring in the big guns: `LLMExtractionStrategy` without instructions! This strategy uses a large language model to extract relevant information from the web page. - -```python -from crawl4ai.extraction_strategy import LLMExtractionStrategy -import os - -result = crawler.run( - url="https://www.nbcnews.com/business", - extraction_strategy=LLMExtractionStrategy( - provider="openai/gpt-4o", - api_token=os.getenv('OPENAI_API_KEY') - ) -) -print(f"LLMExtractionStrategy (no instructions) result: {result}") -``` - -You can also provide specific instructions to guide the extraction. - -```python -result = crawler.run( - url="https://www.nbcnews.com/business", - extraction_strategy=LLMExtractionStrategy( - provider="openai/gpt-4o", - api_token=os.getenv('OPENAI_API_KEY'), - instruction="I am interested in only financial news" - ) -) -print(f"LLMExtractionStrategy (with instructions) result: {result}") -``` - -### Targeted Extraction 🎯 - -Let's use a CSS selector to extract only H2 tags! - -```python -result = crawler.run( - url="https://www.nbcnews.com/business", - css_selector="h2" -) -print(f"CSS Selector (H2 tags) result: {result}") -``` - -### Interactive Extraction 🖱️ - -Passing JavaScript code to click the 'Load More' button! - -```python -js_code = """ -const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); -loadMoreButton && loadMoreButton.click(); -""" - -result = crawler.run( - url="https://www.nbcnews.com/business", - js=js_code -) -print(f"JavaScript Code (Load More button) result: {result}") -``` - -### Using Crawler Hooks 🔗 - -Let's see how we can customize the crawler using hooks! - -```python -import time - -from crawl4ai.web_crawler import WebCrawler -from crawl4ai.crawler_strategy import * - -def delay(driver): - print("Delaying for 5 seconds...") - time.sleep(5) - print("Resuming...") - -def create_crawler(): - crawler_strategy = LocalSeleniumCrawlerStrategy(verbose=True) - crawler_strategy.set_hook('after_get_url', delay) - crawler = WebCrawler(verbose=True, crawler_strategy=crawler_strategy) - crawler.warmup() - return crawler - -crawler = create_crawler() -result = crawler.run(url="https://www.nbcnews.com/business", bypass_cache=True) -``` - -check [Hooks](examples/hooks_auth.md) for more examples. - -## Congratulations! 🎉 - -You've made it through the Crawl4AI Quickstart Guide! Now go forth and crawl the web like a pro! 🕸️ diff --git a/docs/md/api/core_classes_and_functions.md b/docs/md/api/core_classes_and_functions.md deleted file mode 100644 index 198fd42..0000000 --- a/docs/md/api/core_classes_and_functions.md +++ /dev/null @@ -1,141 +0,0 @@ -# Core Classes and Functions - -## Overview - -In this section, we will delve into the core classes and functions that make up the Crawl4AI library. This includes the `WebCrawler` class, various `CrawlerStrategy` classes, `ChunkingStrategy` classes, and `ExtractionStrategy` classes. Understanding these core components will help you leverage the full power of Crawl4AI for your web crawling and data extraction needs. - -## WebCrawler Class - -The `WebCrawler` class is the main class you'll interact with. It provides the interface for crawling web pages and extracting data. - -### Initialization - -```python -from crawl4ai import WebCrawler - -# Create an instance of WebCrawler -crawler = WebCrawler() -``` - -### Methods - -- **`warmup()`**: Prepares the crawler for use, such as loading necessary models. -- **`run(url: str, **kwargs)`**: Runs the crawler on the specified URL with optional parameters for customization. - -```python -crawler.warmup() -result = crawler.run(url="https://www.nbcnews.com/business") -print(result) -``` - -## CrawlerStrategy Classes - -The `CrawlerStrategy` classes define how the web crawling is executed. The base class is `CrawlerStrategy`, which is extended by specific implementations like `LocalSeleniumCrawlerStrategy`. - -### CrawlerStrategy Base Class - -An abstract base class that defines the interface for different crawler strategies. - -```python -from abc import ABC, abstractmethod - -class CrawlerStrategy(ABC): - @abstractmethod - def crawl(self, url: str, **kwargs) -> str: - pass - - @abstractmethod - def take_screenshot(self, save_path: str): - pass - - @abstractmethod - def update_user_agent(self, user_agent: str): - pass - - @abstractmethod - def set_hook(self, hook_type: str, hook: Callable): - pass -``` - -### LocalSeleniumCrawlerStrategy Class - -A concrete implementation of `CrawlerStrategy` that uses Selenium to crawl web pages. - -#### Initialization - -```python -from crawl4ai.crawler_strategy import LocalSeleniumCrawlerStrategy - -strategy = LocalSeleniumCrawlerStrategy(js_code=["console.log('Hello, world!');"]) -``` - -#### Methods - -- **`crawl(url: str, **kwargs)`**: Crawls the specified URL. -- **`take_screenshot(save_path: str)`**: Takes a screenshot of the current page. -- **`update_user_agent(user_agent: str)`**: Updates the user agent for the browser. -- **`set_hook(hook_type: str, hook: Callable)`**: Sets a hook for various events. - -```python -result = strategy.crawl("https://www.example.com") -strategy.take_screenshot("screenshot.png") -strategy.update_user_agent("Mozilla/5.0") -strategy.set_hook("before_get_url", lambda: print("About to get URL")) -``` - -## ChunkingStrategy Classes - -The `ChunkingStrategy` classes define how the text from a web page is divided into chunks. Here are a few examples: - -### RegexChunking Class - -Splits text using regular expressions. - -```python -from crawl4ai.chunking_strategy import RegexChunking - -chunker = RegexChunking(patterns=[r'\n\n']) -chunks = chunker.chunk("This is a sample text. It will be split into chunks.") -``` - -### NlpSentenceChunking Class - -Uses NLP to split text into sentences. - -```python -from crawl4ai.chunking_strategy import NlpSentenceChunking - -chunker = NlpSentenceChunking() -chunks = chunker.chunk("This is a sample text. It will be split into sentences.") -``` - -## ExtractionStrategy Classes - -The `ExtractionStrategy` classes define how meaningful content is extracted from the chunks. Here are a few examples: - -### CosineStrategy Class - -Clusters text chunks based on cosine similarity. - -```python -from crawl4ai.extraction_strategy import CosineStrategy - -extractor = CosineStrategy(semantic_filter="finance", word_count_threshold=10) -extracted_content = extractor.extract(url="https://www.example.com", html="...") -``` - -### LLMExtractionStrategy Class - -Uses a Language Model to extract meaningful blocks from HTML. - -```python -from crawl4ai.extraction_strategy import LLMExtractionStrategy - -extractor = LLMExtractionStrategy(provider='openai', api_token='your_api_token', instruction='Extract only news about AI.') -extracted_content = extractor.extract(url="https://www.example.com", html="...") -``` - -## Conclusion - -By understanding these core classes and functions, you can customize and extend Crawl4AI to suit your specific web crawling and data extraction needs. Happy crawling! 🕷️🤖 - diff --git a/docs/md/api/detailed_api_documentation.md b/docs/md/api/detailed_api_documentation.md deleted file mode 100644 index aaf0e34..0000000 --- a/docs/md/api/detailed_api_documentation.md +++ /dev/null @@ -1,338 +0,0 @@ -# Detailed API Documentation - -## Overview - -This section provides comprehensive documentation for the Crawl4AI API, covering all classes, methods, and their parameters. This guide will help you understand how to utilize the API to its full potential, enabling efficient web crawling and data extraction. - -## WebCrawler Class - -The `WebCrawler` class is the primary interface for crawling web pages and extracting data. - -### Initialization - -```python -from crawl4ai import WebCrawler - -crawler = WebCrawler() -``` - -### Methods - -#### `warmup()` - -Prepares the crawler for use, such as loading necessary models. - -```python -crawler.warmup() -``` - -#### `run(url: str, **kwargs) -> CrawlResult` - -Crawls the specified URL and returns the result. - -- **Parameters:** - - `url` (str): The URL to crawl. - - `**kwargs`: Additional parameters for customization. - -- **Returns:** - - `CrawlResult`: An object containing the crawl result. - -- **Example:** - -```python -result = crawler.run(url="https://www.nbcnews.com/business") -print(result) -``` - -### CrawlResult Class - -Represents the result of a crawl operation. - -- **Attributes:** - - `url` (str): The URL of the crawled page. - - `html` (str): The raw HTML of the page. - - `success` (bool): Whether the crawl was successful. - - `cleaned_html` (Optional[str]): The cleaned HTML. - - `media` (Dict[str, List[Dict]]): Media tags in the page (images, audio, video). - - `links` (Dict[str, List[Dict]]): Links in the page (external, internal). - - `screenshot` (Optional[str]): Base64 encoded screenshot. - - `markdown` (Optional[str]): Extracted content in Markdown format. - - `extracted_content` (Optional[str]): Extracted meaningful content. - - `metadata` (Optional[dict]): Metadata from the page. - - `error_message` (Optional[str]): Error message if any. - -## CrawlerStrategy Classes - -The `CrawlerStrategy` classes define how the web crawling is executed. - -### CrawlerStrategy Base Class - -An abstract base class for different crawler strategies. - -#### Methods - -- **`crawl(url: str, **kwargs) -> str`**: Crawls the specified URL. -- **`take_screenshot(save_path: str)`**: Takes a screenshot of the current page. -- **`update_user_agent(user_agent: str)`**: Updates the user agent for the browser. -- **`set_hook(hook_type: str, hook: Callable)`**: Sets a hook for various events. - -### LocalSeleniumCrawlerStrategy Class - -Uses Selenium to crawl web pages. - -#### Initialization - -```python -from crawl4ai.crawler_strategy import LocalSeleniumCrawlerStrategy - -strategy = LocalSeleniumCrawlerStrategy(js_code=["console.log('Hello, world!');"]) -``` - -#### Methods - -- **`crawl(url: str, **kwargs)`**: Crawls the specified URL. -- **`take_screenshot(save_path: str)`**: Takes a screenshot of the current page. -- **`update_user_agent(user_agent: str)`**: Updates the user agent for the browser. -- **`set_hook(hook_type: str, hook: Callable)`**: Sets a hook for various events. - -#### Example - -```python -result = strategy.crawl("https://www.example.com") -strategy.take_screenshot("screenshot.png") -strategy.update_user_agent("Mozilla/5.0") -strategy.set_hook("before_get_url", lambda: print("About to get URL")) -``` - -## ChunkingStrategy Classes - -The `ChunkingStrategy` classes define how the text from a web page is divided into chunks. - -### RegexChunking Class - -Splits text using regular expressions. - -#### Initialization - -```python -from crawl4ai.chunking_strategy import RegexChunking - -chunker = RegexChunking(patterns=[r'\n\n']) -``` - -#### Methods - -- **`chunk(text: str) -> List[str]`**: Splits the text into chunks. - -#### Example - -```python -chunks = chunker.chunk("This is a sample text. It will be split into chunks.") -``` - -### NlpSentenceChunking Class - -Uses NLP to split text into sentences. - -#### Initialization - -```python -from crawl4ai.chunking_strategy import NlpSentenceChunking - -chunker = NlpSentenceChunking() -``` - -#### Methods - -- **`chunk(text: str) -> List[str]`**: Splits the text into sentences. - -#### Example - -```python -chunks = chunker.chunk("This is a sample text. It will be split into sentences.") -``` - -### TopicSegmentationChunking Class - -Uses the TextTiling algorithm to segment text into topics. - -#### Initialization - -```python -from crawl4ai.chunking_strategy import TopicSegmentationChunking - -chunker = TopicSegmentationChunking(num_keywords=3) -``` - -#### Methods - -- **`chunk(text: str) -> List[str]`**: Splits the text into topic-based segments. - -#### Example - -```python -chunks = chunker.chunk("This is a sample text. It will be split into topic-based segments.") -``` - -### FixedLengthWordChunking Class - -Splits text into chunks of fixed length based on the number of words. - -#### Initialization - -```python -from crawl4ai.chunking_strategy import FixedLengthWordChunking - -chunker = FixedLengthWordChunking(chunk_size=100) -``` - -#### Methods - -- **`chunk(text: str) -> List[str]`**: Splits the text into fixed-length word chunks. - -#### Example - -```python -chunks = chunker.chunk("This is a sample text. It will be split into fixed-length word chunks.") -``` - -### SlidingWindowChunking Class - -Uses a sliding window approach to chunk text. - -#### Initialization - -```python -from crawl4ai.chunking_strategy import SlidingWindowChunking - -chunker = SlidingWindowChunking(window_size=100, step=50) -``` - -#### Methods - -- **`chunk(text: str) -> List[str]`**: Splits the text using a sliding window approach. - -#### Example - -```python -chunks = chunker.chunk("This is a sample text. It will be split using a sliding window approach.") -``` - -## ExtractionStrategy Classes - -The `ExtractionStrategy` classes define how meaningful content is extracted from the chunks. - -### NoExtractionStrategy Class - -Returns the entire HTML content without any modification. - -#### Initialization - -```python -from crawl4ai.extraction_strategy import NoExtractionStrategy - -extractor = NoExtractionStrategy() -``` - -#### Methods - -- **`extract(url: str, html: str) -> str`**: Returns the HTML content. - -#### Example - -```python -extracted_content = extractor.extract(url="https://www.example.com", html="...") -``` - -### LLMExtractionStrategy Class - -Uses a Language Model to extract meaningful blocks from HTML. - -#### Initialization - -```python -from crawl4ai.extraction_strategy import LLMExtractionStrategy - -extractor = LLMExtractionStrategy(provider='openai', api_token='your_api_token', instruction='Extract only news about AI.') -``` - -#### Methods - -- **`extract(url: str, html: str) -> str`**: Extracts meaningful content using the LLM. - -#### Example - -```python -extracted_content = extractor.extract(url="https://www.example.com", html="...") -``` - -### CosineStrategy Class - -Clusters text chunks based on cosine similarity. - -#### Initialization - -```python -from crawl4ai.extraction_strategy import CosineStrategy - -extractor = CosineStrategy(semantic_filter="finance", word_count_threshold=10) -``` - -#### Methods - -- **`extract(url: str, html: str) -> str`**: Extracts clusters of text based on cosine similarity. - -#### Example - -```python -extracted_content = extractor.extract(url="https://www.example.com", html="...") -``` - -### TopicExtractionStrategy Class - -Uses the TextTiling algorithm to segment HTML content into topics and extract keywords. - -#### Initialization - -```python -from crawl4ai.extraction_strategy import TopicExtractionStrategy - -extractor = TopicExtractionStrategy(num_keywords=3) -``` - -#### Methods - -- **`extract(url: str, html: str) -> str`**: Extracts topic-based segments and keywords. - -#### Example - -```python -extracted_content = extractor.extract(url="https://www.example.com", html="...") -``` - -## Parameters - -Here are the common parameters used across various classes and methods: - -- **`url`** (str): The URL to crawl. -- **`html`** (str): The HTML content of the page. -- **`user_agent`** (str): The user agent for the HTTP requests. -- **`patterns`** (list): A list of regular expression patterns for chunking. -- **`num_keywords`** (int): Number of keywords for topic extraction. -- **`chunk_size`** (int): Number of words in each chunk. -- **`window_size`** (int): Number of words in the sliding window. -- **`step`** (int): Step size for the sliding window. -- **`semantic_filter`** (str): Keywords for filtering relevant documents. -- **`word_count_threshold`** (int): Minimum number of words per cluster. -- **`max_dist`** (float): Maximum cophenetic distance for clustering. -- **`linkage_method`** (str): Linkage method for hierarchical clustering. -- **`top_k`** (int): Number of top categories to extract. -- **`provider`** ( - -str): Provider for language model completions. -- **`api_token`** (str): API token for the provider. -- **`instruction`** (str): Instruction to guide the LLM extraction. - -## Conclusion - -This detailed API documentation provides a thorough understanding of the classes, methods, and parameters in the Crawl4AI library. With this knowledge, you can effectively use the API to perform advanced web crawling and data extraction tasks. \ No newline at end of file diff --git a/docs/md/assets/DankMono-Bold.woff2 b/docs/md/assets/DankMono-Bold.woff2 deleted file mode 100644 index 3072fd8..0000000 Binary files a/docs/md/assets/DankMono-Bold.woff2 and /dev/null differ diff --git a/docs/md/assets/DankMono-Italic.woff2 b/docs/md/assets/DankMono-Italic.woff2 deleted file mode 100644 index 1d01ea6..0000000 Binary files a/docs/md/assets/DankMono-Italic.woff2 and /dev/null differ diff --git a/docs/md/assets/DankMono-Regular.woff2 b/docs/md/assets/DankMono-Regular.woff2 deleted file mode 100644 index 99c1425..0000000 Binary files a/docs/md/assets/DankMono-Regular.woff2 and /dev/null differ diff --git a/docs/md/assets/Monaco.woff b/docs/md/assets/Monaco.woff deleted file mode 100644 index e468c42..0000000 Binary files a/docs/md/assets/Monaco.woff and /dev/null differ diff --git a/docs/md/assets/dmvendor.css b/docs/md/assets/dmvendor.css deleted file mode 100644 index 0f72703..0000000 --- a/docs/md/assets/dmvendor.css +++ /dev/null @@ -1,127 +0,0 @@ -/*! - * @preserve - * Dank Mono (v1.000) - * This font is subject to its EULA. https://dank.sh - * © 2018–2020 Phil Plückthun. All Rights Reserved. - */ - -@font-face{ - font-family: "dm"; - font-weight: normal; - font-style: normal; - unicode-range: U+0000-007F; - src: local('☺'), - url(data:font/woff2;charset=utf-8;base64,d09GMk9UVE8AAB+IAAwAAAAALhwAAB86AAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAADc1uGigbIByEAAZgAEQBNgIkA4N8BAYFgXQHIBtJLVGUcl4UgJ8HmZuXZtEW1XZl1rY1cWX7wvl7NY0jDnGEqlmVEZLM+g+Xe+//m9NaFMHk+OwEWgs4kDD0hNcYoLl1G7sFvSC2G3ejJxZRSrYORERq0KMGrIBXQjBIi6oBI5QqC7ByVsTBz+Ahf/fuPoLQbUIXbIhSU7UmVOsCIQpFLKyeN+d/7VvOOXrpvv9031nbu/K9CoRoiySCCEkIssgKDiJEJ5yEg8A4JBb8jSMYPogQokhBgbQIkOAKcVF6IMtfyIQ+d+cu/v+v/au9+3MAUeHyCCrGRZiZMzN577y77uPAywswTSaA3OomsjiqIQJVRNUqEqaqvqveEAorKjzrNFiBKWCmykph97vHcK4frYg5UKbvwyBcApjAkAdx/PSgLH7W8ncvIoOz9HysxsIcAAuLLDjpgq6hIAdK8AkoHaz+BSs1z6FKKHu36QGAe4OBjc9cfBmfqx7I7kVZJDIME8Ag0mDAhr8IGYrt1mvaRTzCZ+IxdFrRlesp5FbWsodzvMjH/Cx4LCTW4iwBUiEtIpvwUzMKc9/LM04oSiKaVlxhxnbjrchITYmLjxNw41PaiKIdvIL0JNHOIlwTRvYoRZjbNW6WNTvc9ksSJFdlg03MxCA8yR1sg+NgD8eEvwqhRVwmmh1otz0PL/9nINpnC+xkSsRGcbADHLAs7+xueTAcrEx4M8nVDKINaFcahkTCPu7ZgBjrzc9djROALdc83jmklZkzBbirzTAwKZu4Y2KGvfNW8JLydJbg3cpihXmagPp7RbETopa0E9PXxaN633U5GQnpv7FBrchc9XG/8K/zIX7xseZklgd4AgDA2EEA0wA8gDtiM6DXYcobQBEnBLF6xQiIHP+i+EeI+AUrlma5pRQNVL626ZBe1FcGx9hiXDXNzRzznGVpca37tq9dZB+3XzgE4hJnu0skebut7ivPyPPz+N5R76cK0+f4RX6L/zLQDCKCuuC7WmR4LnwbMSLHqCO6Ef3WsIqD47S4LO6JD8bXEkfNZs2HZC+yjPySEk2RUe5RmVR/ajr1FI1M20C7Dh6Qz8oseVA2anf1Zfl2+Kqe6fPMtGcadcWzK561n2vz3IrnLj4XZrzu/8Ufhr4L7AnUxTwf83rM1RjSa/H86OeXGH8xuhqTjZ+N/5rPmJPMX836uL/H++JTtZhwpqZpEAYrF4550P9ISSlBKJfiH0NYPIWJpfSPvUcectIq8eREZ+fkhLgzIUEkToA1RzqnJ5cCz8fvHlNnw1PFy9PzQX3njy0DqOnPd6Msoc+OW8wQTXfmY9SCqoGuh1Bd0h+t0Xq2hXB5levz31SEs3LOiY3LMsAzNuzPvVBXvgXVrKqkF9fTNqKhrHs3+JYChwjWqje4d+pfymdRNUvmsPHZiwCBfWdoGLa8t6vkttG3p9d8ThxmFQnQllReh7EfOv3PELLeWPBtVQWEIXHsq2HJzMYTJ56ddmrgTocfow4nZwkifdyzG2H8BJS5QHmGQSfEsMVRsowDROT00cKoqJ5zMN7J3zUj026eQRtUtyzce+DmUphqRI9N0w+c9mvzzsDvOJqvGhSzqwSmoaXTlw1eNeGMSlccaSyYoiROEyPVSIOsKCy+c2fk2aSktFF2SQXDKsxFZ+T4nJ5LF1Z2zx/LE/j7S3I2IHRKxWP/NGi5Nxx2mWev7cVorlbJxV3Xzpjr3ujN2Jd6c4suKUE7b2fHVPsh+riCj7LyuLnJdOIgi3W//+G+Q4wLO4bkp/NzuD3w6ROscPLwAecAEXr3j311ScL5TmbrEa+4O/hJGwRSBs2AKezl6Fcj0jdC3btXVJ3ZNn/1EHI47WLv6mZRxOOVWa5syKrOQzYhHjsGaxu8mMBBCbdjKB4w2Eb6cZAcyHwAGVCw/+h4ldigQsaxlj4yWDpU4O63n6ZI6RSpAnJPV6gQTCMemwDCAeZXcU+TChA8ADUgB3B6Sx/peYzw5KavyOE5XbkUzFh6NlaotbIo6/fKJ+zCXQagwercG+cJmsH0PNgIPbvfzgKFBdFOThmJsnozDZ2WB5OWF3R0SqwkcwvmsLTIEubJZE4ruUbW6PKgA2QvMogp898Fd699Ae0TfsY6/bzdm2u3wDmjU7xKC4kaGi2P6sIb6ULJCuJaLsiXhXXk8+SBk1442VOimryq23ko3cinLgVTVFTYla35Y7cu9pyvw7c673kFSSoCjlxgAUuAcEUBComAQ2Se/9cHJGcqMtCFt3L/bkFMP0I1Lp/NOV586YYf6dfBfAAnuH8kdiseWz62zsfRTyH9FToJk2d0DGfBWtCRenDU9kJFyeWdn+Sb9k0S3TvklrLgLb5cWdndyfhfcnYiksSMQZ5xdh9FaUHLoVgelWl7VaZCJZHJjMxInwyMiRMlklxJXi69vaqxXETAC74Iwlo4+KH8wkXSjuv9auIrxrnJ7Eb+yZUXA1X5yV6IlluKkyGMlXqUqMaLYMdE5q8+t/QwVHsqveMLw8crQPPVlBk8lK+MTEickKkAenHXXnnJTJsMdjpUkg0wgOX3LDDuv1w6YDNyeefIecsP3RZ3SKt3hzfSsmEtqFJIWAYTXIrGD1ufbaNMooPr31OZLfTZKAO+jPCWWBXs3tMBJ5IJeXHsUlzWntMudz3TavGABpQ8eDUau7+jpsFL6codq+Y6zwUenR5G6qQ2qq+Bqe8cES47nmmOGQumhK5PvxkAqxH9O6yxss2fivhBWaLEfGQVdjLIQy6Tv+YymZZbjsVrEJZ5qm18rfBWK5EkgSDHP+JhCYuH39ERy1r/Tit6pr4Tx4OPCRjcrofKnkPFm/rhY8Vs7aTAZg5EzqRC2rTGRPtorT9C/VrE9+zRomX/WwDNa+LEtlu7rwe0oCONhlPdM6WOVKYoW0E2FEr2tIkm/HVcP4FpmCEMzsgwLKGp5RZUwPEElTx43wtd9b9MPNrDmmGQxswM4wF+MgEwAHTo198CfDWjUfGvbYa1ffZerR+kbhcIJDBpGKkYe1wrefrI/Sd9BoC3jjiM8TcbNmNlu6E7PxvoUStkysT17plw700Tj13xXzs6fPny4TQ3WMstA++aUir3lFTgq+2FHL2y8vL2yWEkQ5BBtzakOuXluhY+nD4UfGfv+blnAnCFCrSwxoRAwLHVI0UHskhIIDuyrG8DY2OFCpT/X9KDQLO/GDfLNcI50C7l2iP0LzdvNZjabyCjhkVrcHMlSrMJ24YUcs/KFGYrblrZMXWW879Am9QFD0jK1gsSFDJRKAqV7LhLzxGH54iddZ9pWGVBx/KoPhUlXX3+ZTJT+nwx6QBMh0D+OgBL9VbOSsZziqXnFLfDSVFQwmqF5iSUebCrBnq2VwnUJ9lnUlpwHNx+6NRIBd5fw3/fN3btlHFq37F93xiA2p0jPQJbT07hw2ez2Ontd9eeDzQembHKpG3uXDyIgMV9secFW2SXVSDW9hZhhyS7PIqBXRfj8glGzOuvwxIGfqV2CmrQA/5sF1pljBR7ZmW2REMEpHQkfTItjX8ysbV5aD8y01ZpAnGdoEbOlJSqCHJxBsq/0Ppz8uTNPZ0FiV0w5YlZ7woKeVDvV4ueIJT9W9U9GVKFQYo+H4Y+Pfs8shl2cX1+EWUzm1O2I2WkwikuImVodCC5hOvZfyocRqlcC9qYs5Fre2psyMu9uOsDu/4+jBqyQu23mKfWdR287gfUvYDsDfKISDnEiipBOJfZplXaWAOTzDDH7SbIrRHAH33Dvucr0xUqppZrOYjlOV7nVeXwkSPnrx6XHCDiYOAQhfiroEPp0sWMIe3I/fXgf98ipdLYANkSgdypigobFS/CDVSQ79SvQLSgrb5Ve2wbTyq8cLal+obWAppnX6R4ih7DijNRjgsSfR2BoVyDgiJS1HDY3YYO2ly4UBixsd32AGt7gfUpH5u+YQembZrGdnyzXR+uBZV+UkrsKhqqtr4Tqj6b67NxETFxbkyh5VpIDylsxB7Qd/gmqSkFDeCk26srYUhzGADEIOcGVEEv3JPjc8K4dJLZA/sO2wfIn1TJjtpoewPpwxu2j9Eo+JhaUK1ICQUX8NG3bDzIoy655XerdLwhhGhLb2oLnk9E7+rEJLl4eqDwSUJzIkIeTNhe4H3KmxdNp00DEnIJKTCN0MhNaCrP9uJRroTx/avAmOpgzn4SrksvvHCHeWo6z7TTwPWzsXD6cHDsvs/THuWQfxiClZga21vjs9HBt+xBx+gMA5XTT/Uv3FHGjEBOx10cU7gkoQVDwcWI6CdP9Gw1H5L0z6mSuJ4XkapQoSk6jN9Qm6PhcgGMKn02DH0ORug79ytIR4hwjPBRd/kuqwD0Q6BHRTC2I20nMojZkTZOdplYmoiNE1BmVKeIlGtBBxOEVXSVjbO1+s4qn+1gOM5GMKyW67MdaZL7RtBZ9YYlci233DEp8RxvwEJD/G+MZyPaFBXojocZeZB7M2JTN1U9UDSFl7moXJLqyHqkYlvxE5SDUBRLOyhltBkFh3GRJCKeOq64vHWOVEakNi7WCARCrRCrYNr6PrFNMnOpEnMwE49Oj23VJi1TQTADIk7tn3YN6PXe40pNkuQOsbDkPYPo/BsgwF5tgwWRP3DrGw0LtMKzb4HZZu3fNxDWntaLDmte/YAAvxbcBs/K6H0n/WCdL2zHwpcvr6pqRX6YW1tQU9AXo3v6hTPtJvsb+N7Rb/OhGV/+vK12e93m7oLuwi7ig76CCyD5Y7F1BhVx/QM7JqcBnMp0sSo0Gvn1Xz7Q03AjV8LsyU1Yb2o8SKaeTXrrhh8r85cWHP0F62kXd+cFzyOnTzA9r2hi593tAqTfqiOpE/nPVVjyjvkw/8SkJH/rgekKXPKodfMpcFo6qKcNGX899Qnx+SLkih6jtp5XWg4gkRiJsGMUpHM9O+TuZr8L73CDmHIbD2xvOZIkMe62hz3s79SAYajLSe550Tw1a3nbAflfYHU+SWh5OZL0s+1Rd70Y9NkctRQJMgMmRCAKc6CS2L7S30HYRx+0leEDSveV832PZ8Fc9bgj7ggx98RFlXN99gmE7F+gIsGfxMn+A3o6mYyMkXB87+bcG1GdeW7vt3sMp9VR02D0EZ7C3p/E9Z2UsPc39TCFGFowKFR6L59lSfBcJlWgy+o47HE8RMdXD+IpXUgFTJbIyZHOtMUZxodBDDw1dccdSXds3BFoV1FxKDeTFikbiyR+Ani03vos10nAFa0M4WVkzOGPAWa5eclKVVAADTBa3QtETm7UypKFRlGWc9byILnGi2RHKp/y0GSRi3MWLpYQxHF1b1edGaminroW7P4dGXZhVsXvh16FUZQM86gpGS6Rs+SSLDQlD2KeLFySNRwUWAAhC1bDiYNGlsQ1igp51irU62iStnTRFIVyzTFDmgeh6PMqP8dMbjquyIfty31AbW6JJ/N6IZ2eK60r22b7kbfnZZQUwk/QUg6zOIC0rEvPGSPWwceGg4XFJ1fMQQ+u9iTA3CEW5kiqciDKIUIa4eMaxoBiEgJbGbI2oG4rOysrOuEXxdCXHV9PhVLH7D9tanPLB11SnBsI26J7Id618drW5911emWk9wnhyx+3Nu9oIerHZk0en1MYgPkj9N8Ige8LjxSJPcbP5sQiGBQRYhNUiys7z++9Fmi81JVoxOzbOqFr+oaTs01t7i0VZHmUGD6yz9zQxNUBAsZcAR3so7UAQmemDu4o/Mq6/0aLIsIfiFMXgsmDAu81hS3o9VeAXRXIQi/nLNQkXjqyY5dwTFLII8W/YV58Jm9kCtbnZZcUmv0mEkmLGY2Lc9njex0DZP5CPPma2INkebVo0sBRhKqFfmx0dniWfSChbz2YoF2saEQ0lJci5IbHuuu1UC7JbCEAwmJJ4M7N7WdPm+mFbPjQ9XnhUIuUsscVpjLjC3+rbG5mwGIyEAolp8RYCjRQhQzZOD9CgV1oLvgApCscpO8P3AAY0FBWpqA7HohtL8Qoky5H2F+CKgySMpKgRdgn958+eFgQLINLt4H3s3vt7g81a92L6AtMaiNML6GRbxksy5XatY/nUAorkiJveSEefFg29YSD8YWiVZPzMCmLMq1Z8ZdcNUriYK7Xn5o0OoUu1fIXm5OFWbFA69Hl8oZQ8ZTUUZh8xOhZ/qm+UPxA+zGi5eUslNtrQNT2nqrVy2tJ5DBsZS0Orx0fkwcaznZrbtK1+tsBYZOAwUiYksnA3rNe4sN6PXoLym4eNeMrqj8iR1VGkcMIIH3BXHfyTiEgybGSMiXJmPuMaIple6t7yXb7y6ZcC0DFqgfBVBc/lvUPbbtIGpfNiYmUnXrHQLsT48oQQSX3AB/qg0OeWL8araNdDLqcbuIiJlEvFL7XVspjyl9omLS96qBvQ7eZfyRR7C6FoAWVeNMtj2os98IO9eQVThxc3dE5AO1dncxFXPtyZSh+DtZV1MHn1si6+qHdRsL95JaLB6LS+LQc4Ro+fOyDpKEQaMpbm2LEyMEkVy7SuveVgXZ04/7zVNaobasyvm7J/tpGmDB8Eb9N1vLo2vHJq2ke1Q/untV0QM8ZGdmwlntNxcgNvlSPJ/VePMyVsHqEcgTALwEaZEvWLUiN/CnoTwWDlfndNZkwtBzGMCioefSjoD0ZkdHw5GbcN7ji3Ph4DsVivCM5aCm5jLQKezGIHWL6vpM5xbnCsqZedvDC42bQn6z3XVuJaqGXZRCWH0lXOuPRXMnR+n8p+dTCCaVZkTJd2Wix/2AmeUIuWdjgbsATLuOZGmpue1XtGr18TrgvCpae2isPbNi4dVM/cqKCrR0irhZDr7b/ovdkc8lP03vrYUoehidN4eWi5ZaqdTc90LTqHF/0+B+WTFiZ0XOQcbxo57PCfXkbD8EnT7LzE3tvbRcgqGNq8w6XJ8Bppln3wab8JMbo1Ain1JGjF8cao0eBPc7dnNQIAa4uffJ01EGF2QZTQ6jtbVBb5H9pjhN9YX2vRqikbPfb/UjvbWzUO5GCGCht3PQTE0ewo4x2IDlzu6IZLwot98aUnAX5nLxcyt1aTNv68rvqsqQzgpZWEQ2Ui1d2nNgC66/3yhdPTFUlucuEvnxwQmbKGqjdkMrfTTwQWhD0oPLbYPINSN29+oCv5EVs+U6uH5krv4M8ZSsN2wu2r5wXwjyvcPn9lfqbO0sGuuU6igZnf5eQgaY9zKn46OFjkA8S3ug6JWHK8S4QzXIeUgSHlbx4HzDMBz2kYnOq7Q8FYAL+GGHtOEGURHQklV8t8KURWkMTTFMB0PFgZpPWJ28U84uziWPey0yLhL74vLK6FJk7GPxmWXNFA2Qdm36xE1m6vWWseYy4kFuIqcLmdT+JbflHnYaS/2CFewq6tD+CPvpF7MLzSKeBam1aHqWpspAH4KbEn7LXAs+VAytzV6bgtqAXrjvTKLpY6VhKp9iDxur/2HDGTgnlX0dEJ5dmF6Yin75TIkxhkFJVBrA//1JZVQdfFN9+UlxRBI3O6rqwrkZW2438vKVh+Agj9M7lnkuM60v6T131H/pkqqQf/nRbab+Mce5g+/EljRXlTfBFsXB+UWUxRECnmGYzf8qu4yO/TqtNT2RkzQtWkAyNooKV//M8GK31YpSiZG2Yl8gD2TMasqpM/6bU78HFiZGLljZWNUKnDuZxBKX8LamI/qDkzaK4aMaAjcE35xk9573j48XI/JVXnw7r934U1ghyGEOzRCFFxVXVRfBgseDn+so6CNiqo6jSjs+6t8gQ/UHhttLeUcahX89C2bAEkeadzR+vHWgYRfR7P/fX9g4xGq0PEjC/qLKiGNYgsVbc92CLWiJTx3Kdp5pns9npTYN9w9Dm1eJcKTK7vgCZxwO3j18+sQ5J2wdmvZ3KS4LeymnvEiDr3krbvw7SYp2uKt10ymIB4XtVo1vqKc4lVvE+4LEUdVHRLEW76FHnahFf9R8bRx2pHIy2yAtqG7wQJKBpH0TJJtzE5UXjKGDcF7Hf7LpswltAs/M+0jCSnppI54UtSvLzXcyshd7ejB6rXnrwsfHVgvsdEgd/JN7g2G/B2khG78wPPWFvO9x0LtwKMdWjoNaLo9x+SYcd5kXYPVAeuUDlXF1yLsjKWxBxocXWm6e9qZVvZNPPQu11oaaUMqsV7pRSYRdFZKaJoy5UOF2Lkg9PmA9yIx7JZAFgUaIPQm6CyPYr0DRU0EVqc16g7aIbxEmj80TSNMlbpH6TxDLqpsryZZ2H7l1ZeCWfUZrfdt2wIy+S+IDMtttm/oIz1/y33j/u1wp/tfA+QIxRCL8j8XfRm1TuXPDe+q3DcafH2b+2Sk2cCSr43AJeR15VrbYSt8UHd65+8896lE4hbHkKq63whlcrnnTFO+JOYVjIiA9eo1GnfiFPL+CGtdjqOkF1i2myXBHlYos9Hail03aIKsMbJnR6JE3N3rb+sOon84s2yi8e43BnX+u+SwxfxeHzkjfsALVY2FugWR7oo0wmO52CoNZ7q/jQE3S14N6nOkC3HYu0J4fVee2QjsNYkHYIenjM60CKKFOJytxAD3ek2DyBwi+8AVVImDrtzQBuJ78MKpG44VmseCXshu9iUfSDD92aMPA7BBzDN3jDdcpPjmvNIpcxejj9fFyrlZixAy/el+gX3qiFHQtb8rQYVIWf+oM58r5opJ+frE/MeG4vfhHKw3dZkzl7avGlgHNH7JiJDhk24jROUQWpioiWiO5VBS2jvNBximRPjbimZrEXMYbPXWKcB5y70oXVXD6gSqopV5n1zT15sAh3J/delioc11hEXWhqFh9erb+islJ2B2/oCiNqLahCvcYVHzR4Q64zUBOJItxKFytVJRqZKxW1PtUhVAU1MC2myVE0q2a3zooOOz6i5oGJn+5dUCLtK7jM9GyaQhpTPZbbwzGF1FFXmbOM/kkme96KCT/nVIpS2XobLuOtuSE9zzcmTkZGJN0dxi7OWfg4xwO98H4v9PDKp0F7qFk1oTm4d78DVcwNpENSBSJCBVKE600pTItdmYNKLQ/I4Rmv2rZAq+AyjSOKhY2sp0Yc44gcddVVoRaWKCjpeVWJIkp113Ifc5pooh4X4u7HNvkzrUPrZIcP5siio7MF0ZE9gkOIpmZzWbNwDDuVb2ekyT9HtNj2ObzCq8HJyoKVEinNRjphFbdyRX03Wpksio50YY36qKHGhpUOfJ03QuDTMUTXsFPR7yjioG5I1+qpTsEUZR3WZReDYymRy0WzVERq2RZIPYtpkst5d+gYquSA7if9kAhYoRXD0nl5kJhwF29flOu3jS4etosNK472oPJ4vcMo6n78bSpbWX80hTdhWXLPtpxw3db1baP8lZdMFsJO3NBzDgJZwvr2nf4njBv1s/7mZYP0P6m2+e7JDlM2yptt9FYSDYCtoetWnfozk+lgV1Xrc+ejSyvl73wyTbByvgcTDBxA9tt815gv+Lfzthdt+48Yx7b60fkH/bbnNH/+/FnU1Nov5IjntOCP58/PsfjDPQW7Coi0nw+PMIEOs+58CqHuxOG7AZeqGf+0P/gAP4k6WVuugn170Mkwy7O/zg1zDiSLG7+Fx+1E3HdmMhjXELVF0769U0slOcEubSTqQDbkQLbnRoGL2cKxynaWHPbRATZp55mW+9vig2Uuy0iyDE/xiwnUviw8CqJhy4jEDzZUkkJ/O/330PfpOCcWDf/xj9O/7qrZvadWSjJVtUV2oQcA5lECGACCExhY4MRLwrEU6ZYt06ZsBQoI97k9TDDMOgoMBYjzfuDLchh3Fk4JaxGww6vvtYgPMDGR0wokK4bSEFudIQIYKnpaUhRg9YabCuPy4IoYsdxr/+YSESbMoc6ALVf+QsUQ2K7RoFfEYlQx2sDCU8enhIUlExYLBi4uMkaXuhnxiRTkKfUB2cU06ctS+nDkMjmzRBJZMVX5uK2a/86EiVTWy8wTqTeybn9kthWsCeEyNOWm8MX0CTC4LH7HNYl85Jzl48ZcZ9tqS49JYprMwKIQI0VydYFkO73V0SSNnPjO5ioBsayuPfEI1PRwNJFR6EGwOIvBhU9bVkJMM9iDIYE1FcOgYsYuZrI9B/wou2mLMfelId6IkXA5yt0AcRVFFXsFAKonwhOWEYa7ugmdEHB0D7gtfT8UAGIwklwBHIkyPCl9ajPAhUTopyxCS+psOObsZDLvDYQKaZJ0YD0+/uu/Kh8tqnI9Px4lXA3Ha5QSbnTsfFmeOqN2O8zO/4v8RUmTxjX/TJepH0HfH4CqljOR9q+OoPa7XrusRUivY+sxjSrpMAoOQmDCS3rQxGEcgBjb+I6oHfbgC0fwweFowXrEYqMKVCtfO7gDMYqTC+CC1YhQ1m6Ar/JgWAuBWoen0sfk8Gm5aoQgEMFgYzk8oQcXXIMPZpEOOo6CgzD4Ko8RIjgjEjnYCIES4k6biA8vrFGJjwhDEOyQoNa7I/anIxMDPQJrNj5LJi5DKHDg9bCyj7ASp2FbOlRMW63nMuGyOYS3jvY4QwjlggJ13S7PKIeIN2oZZtwlvoY4uALGgB0AtgjqDFUkb7sE6IlL1X898ZIq7EspAbCfFofXHsQwk3g7++chS7Z8AqlS8Iiw7DtHtWTOgn1P1VGv8xw/G2PhmDYvM3QaGvi3BOlEfE8xvhUON8KYwFUphHASRhJLjBOnNeeiHA4DZ6HGsvBn2aInze+zKwGa4t8KWAIAAAA=) format('woff2'); -} -@font-face{ - font-family: "dm"; - font-weight: normal; - font-style: normal; - unicode-range: U+0080-1AFF; - src: local('☺'), - url(data:font/woff2;charset=utf-8;base64,d09GMk9UVE8AADTEAAwAAAAAfnAAADR0AAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAADYHrfRocGyAcKgZgAIVSATYCJAOFEAQGBYF0ByAbmH0F3BjqYeMAw/KGq0RUj2pRlAvOVOD/SwI3hmhvqPXWMbaxcFVodQ89qEsVRm4InDI6Gs3VnVb38T+eMKtHpJAeluqjdxfY5AW/2Aji7+hAhpH18LRTMohDxurb7wiNfZL7A7/N3v+faEVwiCAtAlNswsSqkzkr7sxFpbE2F4WxLF0avWJR3uYiEj23E/7//948+9z3axuQActQPCviQYu1JhCmVuh5dt77WUKB5zYb0xj3A62tv5pdfs0/mds72PnOmAWKcrQJvdBLbhG19MIayF60FaCoVEvGmoFB5CpGov1mfcv9SVvt8a3BNzNJWdFTvZnl3HKm8v8lojXlwpz//zTV/86fzr1+pPxq3tFP5bU0YVQJFSCH8NdldEEuuzRFyE+cEobADpqDiltbjmznuokcaPoc3vfmjPrz3/8A1Q2fzsM1NRo9YABbvk9KwBmpuqGbkBNum2Pm9NyEBWCXwrUppZRt/3790tnL7UtoVTiUOSvJ/su1t+m06lrTTe4ZF4uQSDyOQWOs4P/fL9Pev8suydKsK9PxiyhshanNf/8n82dmCYILOFmeZAk+vh/gaZdeYJK/nWUiVSA4Hi3DG+daWWP3VLW2sk5UycoKSbLLnApkQEWbzdO7x+hcD47hVB5kI7ZqP8Ml3IABvF8ggEMBQRDmiWzS+LjJkya7fHqROvwx+vO7Bl5bGHjzwK8YsMMNMWmY4PG85fcwjFcbvqARS3+2OwjIJ4193M0Mz0mhw0U8mPnNbv+UMyb9AwjggApsDmRcePEVLIpRimwFJphunqVKrVdjq70OOabRBZ16XXbdbf967KnnXvrgk6/6pUEAwRACQkboiA222CNAipKReOCFFj8CCSeKWEaTzt8UMoFpLKSUCrZRx14Oc5KztNGLmds84g2fGESGXaPWHmGRVfa2waGO9F9Ocob/8XjP8lKXer23utZ7fNinfMYt7vIV3/K/fuxX/ujv6KBcvFhwCcm58KQXKExey8jKwZujdxiflArhllB2Q7/XPvpuKn5q05C2uMlS5JxnPhmyZSrVbNC8Xa8dew8nSJ4uS+5CJSt0qRv19bhXfexHuC+yKhvHmWCOc57nfBa0+HKrrrPxFnvZ+/AEk0870xzzL7byx+e7L7Sc3hpOOMREzBx8tKQoQHI/6V4QnnxeBGVnVAsOAYkIogJzCM9jDiFynjBwiM/l8y5RU6i/akTP4UWA+trG6wXECKBnipp9SRGgfKeK2Rpe7RisSP9gAznDFqJQsOM9P/FQtn6Xjni2dEET4Qp8wz87Wpx8iqOv5WZl6LftqlNrYOwlHCUmSssZa9yiDo5y2ylGYQUy0G6PxN/PC4NNgjWo+uRLOrIfQ+ulcU9RL9FS2U227AfPjpPsm9ONStCVzViZ3jnbqfv+jEpfmzg7ugVTasuE4QKfRtdf4PaNyS17Z/fLpYBqNVcJxVAgWQlOrdwa0zmjRqehNBAhroo9Le0OUW6DjEGXglh0fwZpRX2/JdIWZWrMs75j2uwglFHPw27UP7CcDNT9IQD3gwXPdF2lPsf4GhNFqbzAsOncxUuuGV9epFYYArSFV1zeAouss5toxjtvO7mvbtVixUPtJl6NLdyDnfseE6/EH9+y8l/Xf8dK/9+ktcmgzTSVnGXIjRLUPJuBi2z1Vbxl1yEktjC5bOE0eNhs5Quiz0aa2hdbfqbFCIAVEDdwoL3FsQqcjjCKKEShRM5jAQ4JBRPbdWQiiALEawKM2HnvEQgYROicVPNWpqy3CFYdD8DQNmC0cMpKtU75k1kFUAKZo8fIo3bL65k2bGJS0U32XfVYwx66fxTvKvyr9uWoPXhpjP5CRn2qazK/lv5vhbz/mycWnnTIPi5YmwhVqURjwdlFZss5ASDw90ARyFdggkJYDvthHGyEXbAEamEbLITpMA1WwhzYAVuhHubDXtgO1bAB9lx1J6REsE6ocqAUFrHTtDRYweZgMRTPoexImyDnS1g1oRwOCweK4CgcgiPCsQanOjw+cAY0WnD2qs3gXNBiugA6hbYGXXAZeuEqXIIrJjO4KVxLw13hVod7V30A+oJ+033w9PcOHg08C+rAWlAJ6epgC+TWsVbwEEpnQDeUH9ADXkBFhgqwBio9QA1oAPugKQUSYUAAhQDlIch0zvDIBd4espgUKxMDJ03pqTjtTH1pEB+Z3cwN+aVidvGktC1zynvVtOoLRVUvrD83s5o9TU+zW5BD/7zg38P+fde/j/zz8L9C/+r9n6v/I/1Pzb82/qs9JENXkUXksVB56JzVwP8m/1vVqazT9k6/d8pZ09exX5dN418wd5rLtMRpu6ZdCfcwdjfeZpxtfN/9l2tObaMKqbF///3f877x+KbnG872WkG+oD/ifZv67dFv7/1P9LvI745+d6/zBSahJlUmwxd6pjtM39pfLBQLi4Sv8q+nW/83839/TP96+obpW6a3MLyYFZPAlDOfhPPY+hn/7BI341XXm2YWznzcbfCsHbNOd794dtWcRXOu9yyc+05+/7zB3tfNb1ngsCB5Qd6Cob49FjYu4vcLXtSyeM3iksV/O09eUrd0xdIjy2YuUy57OviG5aUr6BXxKyaHjjI7bGbwmGvesJJambayd4S3KnJVtcVsCy+LZot3mntWl6wRrvFd02m5zFJ7yeDvb156sdW1yy60zrC+fnkvm59tamwmr+hjG2FbbfshYKJdvt3YVfIqS3u1/emrThsGOPzqMHCNaO2utR+Cx607uv7z9fL1565fsqFogz7stY1c+OObbo52c2xxWulU5LzeeXCc2OXq+C9dY914bhvdctweTvRzb/RY4bHNY6zAR9Qm4ox3eHZ4/r/g7aXx6vTmea/3TvZu8nH0+d2n1edD4oO+R33HZ+zyN/K38E/0z/PXpxQHXJu9WNwVSAbGBJ6dax20P/izlnNwZcjykHMLNoTuC5383x/CdDdYS2qk06WHZf+RyWUTN82UB8k7FEKFXHHqFqPwdeG54TdutYj4NeLJbXaRxyP/LuoeFR915vaB2wdvn7zDJrooxiJGHdNG9ubagAczDDvBZNJp1h406izKxGE8J5+2zJSZMIx3db0RQooCVUMDIYHgRr5CrlLJGRIHlV2PXYT6ht7gPs4gIlUJAYIE/qPmnYsn+35iYrNNFO1AI+PGCRAAX4MDzu6a2x3z2KdfJ49dWtXvI9O3mu58Fj66CdnsGSX9FMpSeNpkIUnZbbhGQMQL16XZeVsYXcYlpniYuG3/F2PvD+YhWuxNORwrGl8N3AThn/uxQEj/uRfzeNJbkYCeXNbaLNM+KC1jerqeVd/tUmtJIz1hg8Ibd8p2ytgdqh1pKuEMXmhfA/b+ZoHcjrLDNui6k6Z63xvuPBabJQsWBh2W5ccwq1adnXr/d6eouuk4DtoaJVonmhhZUTFSOUt+kodSdMf2ZlUVo2ubenbECKL0LtmhUDpEpkmPYcHqRsHIygCLVXKxlZWs+37bqp7PXgj0VCZloQ5vibmiJEGA4jKUGCIyXYEOme/qqjJq6SKBKh3jmPF4Ig9yIQZ5y0OcqdU+Zw7jaLtbCEeNiQy6dq2TQwxmcvMMavzXXS1D7Jq2fle4rmyE6rXknxEjtxCiLWgFQDvTusDW1j9sgZV9efsoI3Yn/Bs+5HfkQ5FVoFff2i2uZrWLq4M/orBlWjXTu/m3DTZtXjvZ3HhvSCPTfcnZdPOshnk0KoauRnpBnbg7hgXX2YI7bc+sWBr9A7JHm2U/PPLTQYbtkcYTTAsOYDaAaeOlmUbyfFVjAyHBwEa+QpGmUjCYGbHfchITtu/djPrNO+qsuuqaCpyvsvHQfCB2YirjOU9aYJvgBAWgYB7yeI6s8Ay3z2fC4OfQNNOhwyCYjgxizTfhAxcjwXuMZAr9LSE1kHZSNj3Y//WZnWsq1xT72TOfEZXFPs0b6Tkh5giM3zTmZeuWdTfeZ8k9erQ1qRNseA4b2jB8b6OqLT9mC95lbj3TeYzI3sr3+1MXpy/eNcUI1lTAHFkSm1KWjszYehMeUUBMQBPTMiKE+9vhuf7u+r7tVqoGaBlWVtsp/ZxYDgpTIYUMJPSCcsg5jGUs5dz/algeu/iGANCDB4EBOziMUcaGLUeMsWGPgcUAZQZ1DlaI0vtyozOFWvHpo01pVUzRYSVx0udnl4cwS3hH5guqTpzNK9W/RrHSuyJkFNUWYtFk68kmn7CpAJpESK+oQvwW62A9fFM2SvQKGt+bamlOXB8Zd3T5fk3vqHGv6vyZCdUXqr+tym75PlYkUsb4suTWNGIbLRxptrO4TkhiZTOUSGvkNCguRQUniULeRHZcUFqGjArjLEjCHluPMacotI/nsua7AII0/iCMTzeGHgVEuUuhhzNJV0K4SZ8UQP6SSFVCnJvE8ySOfX6tBWqi6LprCAIfZUhUgmlhJ5g2duImpa/zjKtqdoPFUBbmUuvcpRyJq+hJeU2r6pZa5wTlzz9fIurmwjvOZF4qspwK4dYeEpuERlB9Yi9BqcryEqsOlG+zMhkhq67du5yfTmoBcXwHPnyddlWmW7e0Th+9qnnMhVt9TG5d9p08P1QVqpRvsDZiw1SYXFijG417RMgHp7AZ2vGZc4rAc7ufYQzSrWjd5s7iig+Qe3jlGekGT+xY7cLP8/V8DXR3s+RBgAmOJw8FPMGR8vEy2FQbx+3HcO4+Img4Ybtxn1Ms1VKuAo9sxefpuH7qlLh08Q9GIX81s33VV3zmpCJQMgQJDCygUd/UjynYzX+I9YB3JwINWxOFbMMN9g6aEIeJky21Kxso1CPENZCJS41LSUo2ksKh1f08WfiFmTjN1LZ15zZ00V2iho2igARFKtO0maj+yiHxgvbiXSXr93to96qXRc31DHFVGNZUzrqA2G3uoxrQ6yga3/Hz+ZXx1eHZLLVdU6c0ONz0udEnncJCtyZvY8oSiLyPzUIPaKg7GYlQprizqeE8KlNT+HzHj6kSWbwmIDD9YPkKG5tCUCpNRPHJ7Uv5GM1fCRLhNml+Vzaldm+GuzdTmKJg+Q2HE2Jiestj3G0ww08USGB2f+hqy6h91LCyqryKod6/2L6sgyFEzXj95RHrfRk/ma9sjhB5RyaDA/B/ruj9hvHqIKZF26a40RTnOLdm1MNK3UyNqny5QqWSK/JVDQ16EoAkrPDDggt+2XiIL8Q2YM4NAhtmMotZ5UHAAgtsZpW19W1hCzbYczqJkA+jP3S5fsF79oWPKv+vVwcgMgLGrEQK0L2wAa3vEDt5qBLuAkiN890DUybp+uqwk2AZVPLPnfPWr3cKncGSzwwYWYMFYtuu9v/8aeOyOUrIvH4OenmGfW36iARtaW1d9TovUcSWlAAFE50cnRwg9GpWdaQw7Sk9ibZC2TZ/l5SY/TlrTKQLoappP2qhAVfddPLt3HqbRtavZFtOAV1epLbqYH1p3HUf/X3MJNNjZLIXCbuQXW/hdMbWRJQRVJQ/R8aMx8wokKqkm5mGzUFF7vQG/zjXOLbbnTj/SznJgOsfR6KLtyMra8dWegYSC3FE36vjX9atLel8zboVVM3ezs+u6k7bULjWmRmbEudBuyXXAN3ToYO2fumPSg91AuIvL1bFWMd/aHy6NFZBR8RWXNy9uvzvhwfwrRSRyREY9eNA8mu4ohVflZuuQA2h99mh86GuQPpmTHydAYqE+nJAh92N1ZeH9g2+0r5jQ3HKftMol8ZM7cM9saarN5QHAKk5M/HapW8tN6VaT88VV22g0bPnPHT3YAvi6Ma2213a2FV85vb3sV1sozvzo6n5uKAWsrbWQNOTkuBSBB7HjAKSZLA6v9D8hWt8g7FVlp7SbiM+wrnJ8i89/m7+8a5LerEGnJykTpApE4+WbTJSPyKkoXq7hR79ra7F05iEpACcWJFwiA1MYnYUSW0iKdXFYBah5MhYHY4oFKtpt8NbOzD9mPvzac0j3LKPZbCPUelFBTyxDsFqxXrx045Fz94Eq7xGdaV46fj8lrvP50GreLVwaw6tp+xJoFExFWW82YPxeEqHPluMS3n+A0ttrCNfYA7mL3rz3gslB3RqvfVDFqMaoW6y8VGXUXu6W50nPccJSRzHsTuZdObCBmPCi8JyTt1/J7++rLT0FRvcRSxITNsuptduKQGGVw0Ly/kYc9sQw7gwpa+aQUEQT37xOsgh//7+lsp185HDdgvmebGwU2dS57Ex1HioFpB8daEmFtIiZeMwFxJQM8dA9LgrnD2BD+QXnaBYxv2knmHBReceYuxjiKXNA7fcyQ8ZFowW+OKFDV/Dotsie+ZrV6desBhGx5FHGjFMFo1wym9CNwQcO1jPmzvT7IPJjsb4mqNFuO31SKy38dTg6Un97gPedTxRdLSXKLzubOw6nkV/4lmsvWauPBXP+57U5rXAX/s5xm0DbZuNR9pMO8vb2rvmtTk6Boc6OXaGXffWfnVHSnmd32D1ePXPnPy9UUayJI+vt3Buu62z2Jp+yMz4cTOtTjpQTEjIFkE+vNigJGm4UNwW21VkFqoskDSS+ldvv5RhDKiOcT2hwhwlUZByUzSrvYXo+tqqtIHSowfbclmqNiOvcLOD7lvfEzcgvzqiMC1eFUOrs/YWrLCU5/H01HwxTZraos8hCH027hkOnRUgQ5A4TnAFu9v4tUWyJF4lyqiovMcEzQIHlY2U6qzAj0kTOTT5BLXRxM0zFLUUTDxQFd9WejlrTP8EoT1KNKbsLWBhmgWSDhtS40dJe44cFNJ7dmMWD1X4f0Glu7RfKy4kZwJRQl1569DoYRP1Zt+ZO0KNfvOvWVeQ5DWYYMHZDhvlPJfq1HnZtZbPOj7s2ssSiujKzcfi4jZvjtu2O2vfT3BLWIL3Lz9Q7UYuVzDLSoDsno7ytAUayonTpwO9CKE0ZRlrlFJvYElI0tQXff5m4Apb15uF5F6Hw38MO+4lSEZMkwVEh9ncDBNNBmJuQ80tHMmUOyFtQcJqAMPeZfZSEoY5OrEUYQ5peFCyEmHsAmb4BZgYD6ZriYpJoyhveqLEkJ0lN5usb4Q7XeD4xhR0pvQi35jSgDRCSS9gigPrhau5zFMcneApxuQwVfA/OZODTYZH2TxX+nbIODxWkL5Cs8JsFc4qVDQ/NuKetjRFoDh2BLKOeb1Df2Aprv2rgth9FLxLL7IuhiE5k8BMzSLqvUVWzV280NN3FosqNAt1PmZnpctZRPm9lrZhUUj1SN8Y/eEwZMot5NYYE+Fv4eZNmZQxYrsM+Wkz2v9c442JVvN4Xm60Y8edS0vep/sIo4I+J6BLyVdnW2MBVTbQCBeh+Bt+QaPiRhTjmwoiCo9BxkAG0XeMkYaDOZdxNBmocuBYc/szgb8M+Vqd6E1wseEhhX7lzqayhyKBeB+GN+iXbja+aZcli7GB0JSlPjvOd8UGAvdL15IjxPmh5AiOZnC23trUgYEOW8frYMKpl8FUgfgAIQg9YemBVajvBioO4eCeqOB6XnN8qAfCvJ0jxLSCFxiLldvPWgSIdV24hIumLgqgFaUMO2XhhLJBiHDhQoyA3RiQCoIwdVf2QsQpx+YgSZY+pjcVff5Q2dmIwk7xwwrmGPc4YAn+qTuhTzs6jBfcHuAcjYvzQ77/8kIkKxBsiIjPEZ2scDW4xJicOC+kuXC8l1sQhsAdsD/gPgDSiB6Rbm1zn1zGxI1MyGsTyzI14K4Dbl4OUcY0kD8H+cMx7WZ7Wf8QpM0Cft6IKZMI+Qbx4RmeUOhtd5dORJRBBHpziNMu3CS9KW54I7lXv3GG8EJ+BtvSOJCx6jIULbXkiMdpevh9JX6LXRiINRTU9Yx8STc8SoatfTH+vybBKMIX1W5DF9Gnoz+XHs++1F1QnDPJEK6k/5NpHVUwN9tHjYmmTApXshKxoNdCD6d+T1nhoSpgETRDJU93T1+42DMIiR7pK/1LWQlyGx8JrMNWR0lehy0OHqkgIs5DA7dzA/c6EWhPT1k4ikKj0wFBfi5dQScYnwo2iMh9NcEBZeMA+oj7FPokdujMB6YecIdaLjGh7N8SXjsMjYI245a1/pMQRhxkFC5JZqnuqITL/z0EDiFRMIx8OzT0M4YKeRAfxmf35Oq5GBrlFkVQMGMDIk8zXJgEwjOdLmgfNajfFIBt+TXgPo4ZQiXCZzP6wI0KZ7+ht9i7nQ4IxgR81jexc44K6NBek9sfO4/Rdmg34TtDeOzHPXXQUyLtwykZLfI/x0lllxsTYYpE7cHeUD7FYLdCt4Ty0wcd0Ech+TL3tWHOcUd4huEEcYNAdskWX4Y1zFeS2jZltevblNv2+OfP61XyjrrOLW0+xMCOgJWd9uK6Evb057UPrVEwruhjNgv7W6B9TBfEERcnn5d1/Pm9TGq70t1nISPZ2MID7PPrau4TuGRjQEn7IktypsH5kIehsc0thcbyN7iuDeukK31rmforRIshIc+M9xTK5y+xQDmHak5nsx+IkK8fxVX4xBOcaneXwjfdSlc6ydzUbONPhs1CZg2v9M59aXFCw0yl2dJNGvyq1V/PP7jzim6sYUu7XinhPxxobBsjk33Ga7uVOYpLR+2Mtidc1MKB8IVfKwxYgMVxMLAP+1yMAbwk9vw7ASS+3crQTvFOn2Ymx7/N/mktQP8SGWLd6JlSIMCnhCV9bfb/egQvbaZ27zpyh2T8uglvZsXy+p1Jh9LaeqMak7BLDcz6mv/XSyCoy5X7LZd3bJ+7N/krb/l1FifET4fNvJBwhMBHfunATgRlEZ0uFV99bBfJICLiDRFidx74R8BDW+/gkXvZdfla05di5uPumMEstN389IYen9wYG5OTWWVxFS7ZsUeijkZgdjC3M5nsu5p7GE4rt3BAm2MY3jk2y+RdNS1bXfSF6m/b3cgl99azjeDYFWfcn+UMwePocUOwpPICYhMS3Wlnl8p7D7GUlON7x+oXj+EpB26CNlOM1UHH0Suqs3U4VwNjgs7qMrzDW8vUBiTE7Ac8mbyjs1PWrmN4cszGw6QAPE9bBGIQ1R2sWMNevZHO0Rx3csiBp7frDnV3HYEjJDXZ0z9qhrKMZROt57KkLk1ZELXY27SZEuNBaYsuRR5fDXXvSw/9/h8NejLyg5QTKh7RxgYro/Ba2dBzJkGJvK3pTNHlfHictlyABEQOej80ghPGmmGKZ1gvlV5PICrrBePE1YuZzfxrFs1eHVhy8UGGTA7ruIy1qwPdHfYuNBHGnRr6FxRQnLF0Qkz2wbs3dDNvCO9lHD4r2Lk34jnagwqMpxdbr0ExO062WrZUNA4pRBW/HfNCHz7GVaQs7Ff2mYFXCAx8VfikFqLG+0qQKMjkPV1SlJdh/LIdZzLuWk6q22gr+36DxCuFy+XRZojNPnUghv+G8EvJukNxOMOxIQ11jBpofxAoM8ExRj49rMPVUxnGnTrIFZyV21wG5wn/2dcdzjGMf/SoP1+TJHg2HUYqcuZ/HPpbTx8bVHnYKAfsFg9AEaU6Hy56/tNEbCjvXvvN8xYJ4TrPJWiU7sjBz05v2/mJHarY7/5wtI6RnGlqgp7HCtK5+0XG3VTPQa8W90oqDA4XC6hd+JeSrh0YJfgtugtrbI5qXzlfci0W5703areHnxAT5yxE9FGGWxE3FMvuR2T3kRWFlDimSioMt+1kDPqnGVslmJdqeui+UtfZlREllltjgXGiiw4x+KoeYhQccSBU7CkmQ7GD1SrAxBOGRXQZg87HopkHnZCfsnRTxBpo66dLOcOJHY3BHJ7gW3wuUKDvO2A6thcQpWfAHTMAyiBc0JdJju4jFy8TZLGxYaRSdS8DbGVum8NCiv5DvqDajd94D1JHYWpCNkqsdmTFG7G4tc471E6st2fFyoVmX5L2/SZEYElEw1ZGw0GGJv2KPELkIaL5IpcRPv43HbVRXfImDelOhfCgZHyvT81zzjlvvYV5AFhg7j+2iF2UTRRorPysaSSNBRwykbsXXGC7ds5xOc4+/Zo4dD9ijV3yUcNEDuY1nNnAUoy4jbXkexrFfIIyFvbSaJ39OCtNft82ku8doq7ou4vIA9enIrTGN2eSsDP/dTvtrLoDquHiynPWH5zIRJNSZNg3T3DhcqRn4HfTN8IicU73CxoOmndDEMt+pDPmJJiiffeacB0VenfymZHf4/IN0rrhAA1TSpyF/t4Prtc9JU27AO9cGJolfWm+SdOpGWUSY4nBfnymFiWUhuOhF+RHkQ7mCbafDECa9qNDCR+pr9Fbxhc3uTXJ8T9vdurfKuxmFBDatw2FDrr9WdjHuGvx4JLs5Ri8dMmGjtcCfSkugU0x0kaa4XFSsHtRvJDgmiQwVAyRc9QJBzMjzryzcw9+ftLi5KdIbZl8Y/K1eZqu0EI2r3xxFt+29BxlZ6Cgma+mgU1mz09dVpi8Pf7H7cxHS6OAqivg2fki22gPH6bbnbi0fcuibvD6xhUMXgn/9cfSdyTTOJWJk5Hg3Y+kMt9A0OJ8fEvgcv74UJt4R9rRp+K896VefGA8g7IT7pK0TdFdLCBZ4hHH8gTyUAgNkSa8yVUNYWqkbgpv/qTHwhDH0ujw8JJOsYw84ci4A1t1lQmlkavFksa+HRI/xmqOE3f05aVEnhtXpmZcszzjJMHd7ibqzGDGrSBLC1leXSGyeEjHZaybebFE0vbNeQcMkDeA8xW7MIe4OtM33IpGmHYTXCjKHwFSPbc6nzCyrnLk3vmXmfCl4n8RCjiEXXf2BfUBlkRZIxIarEybnUEZoep1AEahzB9v+vniNm/2JWeiq59aTM/fOHBmlcepxOufYoWCBvebQciJ7D9kBsbG7n0lAPLbax6Az3/TMgnR170/1QQ26hmDlFZqA2zC2/lV/+shYQ0mEJ161wVxA/SAckBSTFd4A3hchOYpC3irTNEdzCCHzeRjo3mn6nbzntZXY6Drq8sV2nsCNKWIwxMtpEUai9amnkof48r2jNLtjubxwFqy20+gTfNqYS/qKvIHe/kdYfFQvHnMeG4D8yVVGQcv6HS9bXRKenEuEljMLdkqKQCH+n8zeJ5hqX+zLmBnVhuVGF+GhXq00CNHyCC2i+OPAf15ThcrfbEtqT8PaVKp97cjW/Xdks6ZAS24t+pG5S6E1qTg2p5fzvzAUvF2QYO3Bm5N3OAs3VxnhTLKg17qcv8PD/1DuEfyyAPMhk0MwTwzL2Xbbn4P22gPjkTxMa7yjOWaMWtutTkIYWw58iKOi5hQjMcJI0zS0ZcQYZi25+LWw3tI5y7UCBCt5j65Y5/dG+ADqBNeCqgjmqcPOh0DjoWm795kKTUJmiQjnzRlbDD980+YfYYynYnfP1PcSsHwxubfS9mcGqVP4aNeFxlkH8CCrz6QBvCeXOWK0WMxOAEjr4O4Jz+SLuKVMqeDqxS75PXRG7RJLzV3QGgorvLLZUz+o7SyoeFcgrXy9qazj13JhxscPMchl/sneUpZOBj/ewznwo8prJV7ll5eeSv/aTwsxaVnH2DdzE3hfjvn9BgmLeTiLbk1zx6qbz/U5ntlGyW/B7Dpt7wvuj7WbYkmim8JyfGnfbQFesDhPErv+qFzIOqebeCKnw4TSxnMCe4TVusNTum0Q7bzhO7aWG+KPlywzIzaXdiFI4TY0vjV2YwNetfcMgK989y3zgs3GMsbh9w6Ma6HHYZuYp016p2P7ondG0kP7fl6IGY9JdZtPVMfHdbLq19yeaqBZ+cKyhRHPNcLB/hkmZq2+8A2A9Uxq38k1dxUwDf/a6IXObr6QJtNnPUzKPUEJaQUGvMQL4JYo81ciIHGUafLEEEoYWeLTgwNRhDG6Ioc83S5483qUaRnIoLs4qcfe7gHNOAe2wN0iYc4Y92omdlMNmkK0Y9PBt8cvgcJeoWVxHOOajik/MRbzLofngcBmkNBf70Qruj2aZiWA5nix6aShiseJAWgMm5qBl7EJzR1SX/dt3qorxCULUMChf3n8+2gpRSetNoKjn4bQaWEtgUcbMee4YDwbG/+VAxx39BPwrpyMOdy1rQZM1mxfge7nfsKizNoanRgeBNpbl4YlzMJpWUETc8BVpVD6Q/H6+zv1gJUP+NrtZzGACxu6g1UPLIgnats+K26AGxKMoyvK7xW7prraCNOOD10Ady2V5SeDEEqQ5pH8e5fUpWr9xJ8XFYV+pfY0Og2aBZGelCOQrpVd9OmWhbXyqlJ4kRLevbaF+CDvbP1hNBtQT4KSARN1RJvD7kL1R60+p5ezXg98k3V3CTiB+RN52omQjGXbsOrpIC3RIF8mVLyA1fntJJ90EVcuzIJMCVrnNDs8pI8YMxUoD1GQ4gw3weOkJzNx6c0OocD7qNxcQeLEQ7NGUseImK9vDkBMRLtrrREhK0nACAbUxtAA7d2RbLjmdiuLY4atbbGksZoprGWt3tA0avMYVq7G/m23jbBrANHpuHRrOm/PpUBKIzhS62jcF1b0bmVdtQtacwgpMjR4uj5qzaVdA3XlV3k2/Tqtvfr6nhkLxCPWyOX+Aa+Fq5w5lGTW2/yHpNvrS6D56E7ex9jHBy9A1w29W6/xbGWxQjpHesS40TP2qQ782N3c4fHUjjpgT59wyF1ybRSJVhqwLowAbfXYp3b+uwl9JMsnD5KFKz+7D2Xww/r1anCshhSQr06j0mpTlrrd/hGSihRQkaLaKEsiliuN2RhCgyT/Zbw/SX0FWkBFDLGpVDm8TlCHEo39IgenUMb7eCAn/kpJBcVZ4zCGH50lbu1ffy8xPov/b420sNvr2b5KPvYkASGmsRu6Y7F3yH21XZdntu22VQCDqZ7Bt3D3UlwsodAYZwfuBwLuV9ClcOMjaeCUpXjxSmkagVceFVKsiwhMMP+rGTcf4nkn+f8H4SafLog3b74O0TkCT42VNQ2pVDaS0rSCNTwOo8jAJvmneZ4sFRTPknobaNY+3DEqo0D63PGD224oDHbBs/2xqEFI3Ax3/sm8nwG0pqmMZD9zBQMgVfCi+CwGC6GAGZyv8RbOhKrVAYBCrax/9U12QiJFIdES12zXF5DOfLFTG2/r80WCGY3Te1WmCNLPmeqeb0DO8F3cAUnUePuPsG38RyHi5Yv/trUbhX/obayUwBxbuFSvj49/DGz296SIh4O2OJvY9SJcuWnEp8LJYJ2pskpMulale1YSfl7NoO59eDSjyNK0wMIrjTohjgfo0TjiFY/0/GjCyCeFnyQFiaoltMpnih/xZX9EOVUtNUfC+dR8AropgtvdYEjaD0EuLi668jXsYTfAFxHmktgcxE6d5Uf9NPzkjDzn7bXqU0Kcu7/eLeV6nNIEJe7uapSrXawgU29BGS1Wo9+nIfsglHRer40L8Ybal5i0dGSRhIqNT/qkWgII4SKjE+jGJNSCK9eE/DI8z5iMYjSy6w6eWyQ51JZgjHEYCsWEssiMYYr5EBcXz3oIJKSn8CrVoqe9B2hFtSRCvJxpRSHnagcoYm79lnkcUvEwoUslgrtyzyQuT+zNMzkzKCKgTcLX4mVU5/KDYuVHLwQXYr3CzMLtxcYHd9TswPkaNVCdzFcLGBxXPAYzFLq62mJoQjNYhXCgqwHu7LFxssU4YGs+eTcyuv535Sd75Wx2lbny3GqnSpaYvazvYLNPhAWSCAWTuCZD1HDpvD+j+oXDVMnkKCDgYFKPLlBNOUIc7n+kqb+CLdQ9AwVkJ/QS7xJJHF9XbUGxq+/5EQ+FJUFfvkApycGqE4Mw9xjJSPuZLidFIDq7B14gXPdWRZ+zVJSdPjK/VQ62lhT/lHgel5brvbqYc9sdty8yzda1C+lUdx3JTKYeVIP5UPsu7mbb1otXLRJjLjLvIp7phhPMUF5utQ/vvUjD07naoDbD7p+PbrJgnUGhzM2/kOGup8ILG6jey4vvV+7Sh3Yzpza4lTfaF+1lEbecMd55jdmgf+d7LPHyzdHCL0dg2wdRbVXpzPeXoTN+aab3vAgONh49umUVlR9w4KVnkROXtSQ0QPEFkXyksr0k91u6e0Ar5MCXnWU2oBdjZaonMCzbCFUPfdGheQs4c7hAjQFnYJ07QrjVhFw5c6hZ0x1MslWv/QmDRmpNZP1a+8JsQho86eiyacn8vKTpr2EmPdJNsbp/SCHmQ907b5J93tbxqPKLS1jCxSUdBnJNtTvijk/MiFaUoLNwet23PpXbxeG3RO/p4mQJ6fQ3LMl6l6mkTqo6SeYeZRHUidHc8+e5z7Qb+U+2OMS/vCnmYY4nSO75tK4fLXquL/LPX2NGOZj8poyAJrLd0FtF3PWKmxQY+dJeGRqrEncznZP1gDPqKaP0nM1DtI9+hncWA1N/CpdJPkchzZ1Sa7t2NjaSQU0YoC0O4nPLJK1oQCffaxLX4TS5mSTNg2ijFnaMvZom7DBpXJ9VL6vgT7mQwPH1diU+meguOIQqESYNH4TZnuoFPLjz28funCrTkuV+0Cr3kRULeatNLDWY0JsRKw6cwH8JR92du1E/oqZRs3CTdIiYTXHQl50NY91j+PTm1EWE4HfI4PFfXkjeUn7ZBt39Mm7DWRPwlPoog0Pl6wVbVXaStlb9SPivHU4itxVa+FWemnhcO0KizuW8pXDKJ+HUegQ+W5q65PvINzjs5WW4bPvvB8GYZnL7+JJmI55LMTJZ5i0VNpMiSE0pJrE/CtNMcvWb4t1B2FdkOmwnSa4nC3i4Orpa9dF6udXLmDANgepaXYUiMYBp8Q+gGPYfybscyWQBAfxNge25kmiTbSlOYavztcSz7Wvkw5cE3BP8osg2AHEf+CoD4HjWk/N+xfyq2KGIESAcDiYiFmNw+NnhRAJreWs/ZncaTauoe8sTeZ+CyHKxibOJf1+qbasPr+2Y9WriOn/iThSaFe/np67yA65r7FTuyVK8ii/M5fnlpIsUybmnEfIxU3Sv4JPzLHgeYDccgmMjBUVzSO2gYhwhG+0eFUsuMmAqCjtvdIWSpVpo8cRYhgKM0WAudBwEZjMPCX3XAEBVJ12NSHcMq0eEKQ3FgSnaPJGhHhVmiLhjfaJdQsw3oEYlG9deJfkCH7p7LhXAASMc3CcPM9Ck/Siggwsb72WQiJqibqpK+NTXvKl53k3z/rBjDXPlF/lc0OeWEeEUK7m6NyruRo3KpTvapDEQngGnA0edDx/qKlp+I+OgXdz+pG6emJgFwVmJX2t4YHXW4e4ECVDXX2jUGoWSilW19MyNUE1QnN1qasPCoba6ljcRzQZlCtdfranC8Qm2/EMIVHZSNRLycFbdJa1krmvv7jyBK8+vfzpata+mpiSEpwSRnv6Nes8eENZzcoGlKS5BIWHO8OEwXb8UBUKU5mIsDqeYK3hOuld0NhrgpAU91Y48+c4IYg6kWL2ntgpp/fswSw+qt+nMZ37rzXn8mORpPx5/9vZn46aqNK9EXlu/O8/4mP2oVFEM/WlOSLUrgrTjikaquR6fBXilKGMYRvK9nNj33GEUkJ7zUlTUGMOeJp36ycmOaUB9KalwWE6ac8go8xrzpl9MCRFuIOT/TW3TU1Ze9HvPq2QUquxKkrt84wUPsNfDFbEprzgOEKC7mA25zSKopETalokPiRZFqGpGabTBSxhlM0LWRpAgfGcd9UYz4+n2N69WACUYamKKKImogvapguwLyuagUbN7LGEuKAeDHs6oHT7Exx5p2/5eKu2O8h3a+JDUuSRsk1d67eEENVU9Nzmjt01F1Q8m0NhgIk24pOgpPKm0j/mP4wsDKRF7h3nY2edn/stHfBi+ZibxeuM23PtWJEKyZ0Rn3Hb/dbtFY2/L1wrojm/SsSKLwiHHTAy9ZJPBp2knEtXKy1cuAGTcudhW1phldQncBYfHgSxxkcBZx6SXExw0VNtcX2SpWN8cyYG+YUhzSFYDConyF5PpAvfk/Y5CuTvYE5Lb1AfHmPp7rSm8+8oiUfG6FKVTUFkMkbHYh2FyNvjcflhlkD/HKMMS5vSL9JpfObALOeyaAJizIkG6BTiiRM//ChjULxANpdgoRcKPyxhR2qk1B6sDankmjNp6RP2Trx4vcgsJJdLpaxjxQbRU7/W7PWYyoeEem13x6LDatdohKuBBjAO7kHFcM0L4hMc9HxPhIYaQ+IfXJsM2IHcGGUZ/2akM9Kbojj0gQsdR9fERtSxSkN6Pba8IOkpKih6a1gO78JUbLghDHB1S/QWzoV00YfPFbK5prUsh8DlkcieMimZcepcvhqGkjyXzD7yXpRQ4U+893T1MmXjUbHgSuWSkyEk6XadredfKPQrBcf8gLFCPe/tN224fqksSA5IXMOfHdR6/AJkwG9ilOheBqb7ZKqZRKj3lpRpqTDnTvFYC1Ai4OPzjO4NLR2CveOnDkb2fXu00KpsjdNhF08gu7GEoFawqaknlqaXu0m5lIJXcbIfnSkca7T0jKyNcjOWLndA6TIH+EfRJRjIPvIX3JHlr0KmNgphUybjxdO8m9bhDQW8yzgeB6u57NNmwsnu1mNulj7LD19yLY7UOgx8qV5OSwLkVX9beKDztfaUe8LDIGKKm5zwnOvWCTJgNIzpss4eAn4hI2AJ11/9SvsbnqXMh3L5RWhNPh9hSFaWQ5PwxWryjbqjuXEudvTEoJRG67jYXbmhvKqBRh9UsB0A8JWbEACgwcGCIkOKiIFUIMx9bhTgE0BCT2MTr2q4P4rph8PDXCqngI4bY8LZjElgB8lAhzj0KBuEt2+D0dDrAkgfF6RBHiXrqlQzRjRHaGVavbHn3yLYESAbBJUi1/IJ6YNFcLMFtg8Am9N749Dm8z+TAR74TPY5o0W3a175nDt+jOYfipjKdBZTxm0+Mqj70Sc85bme52mv5y7eQ7goo42Zgs5KhJVDQEJKTrdev56vJoRDQtYFNz32mlXP/yW+4Zs/957fz2u9V2/gTE5w8lOfx2mv+y5eH/CjjpmctxX4DOx8IlIyChwcTb+UydI0s4+pZ6qZ1fZiu63I1JRFjC9w98q//h/Lvc9l/TUrF5cbmVPllDlZjpnDB2eDI8Hh3IZGAfD7Wd689rfp905AADYyFcA0AAsA7AaIXIAYNSAzO5hM+FtDVxkSrl8vKEBFgkQDOjDACvEnQNayMdlg5adxxptgokkmG+OoueaZb4GFFuEyWWyJpZZZzh7PCisVK1GqDJ+DSlWq1dhoE4Fam22x1Tbb7bRDnXoNdsXGVrvtsdc+B+x30CGHw8MRxxx3wkmnNGqKhY1OO+uMc867oFlLdIQGrdq069AZE2t16dbTCDjqNTYGYgU3IyFEhAC33HbHXff0RUZwIPSv+x7o98hDIuLs4eljwVAQKhJCKGGEI8cJBUqECODjgBgREqTolSIVAgAIwK/94kGJld9bieGOCgAePF0X9Kt+u1qGc39UJ9NNAWCA5soaWFemkdZsY0Uuz5IVElHnLVqlhKgQKZmBQ3ylKBMoT7lwp6VJ1yYsKnwynZKCUpbNckwnoDdWIS2RNHvUqYsKd3PSJIdCzFLn2U67CHuY3Y511vqjTzJwDu7pD/KJFM4a0Jp+cVshOZTT5KfVhVA5PVtpF+54sBaQlPdnLy8DACkPLA0hhHagVT5qm90VqKPZC5VeyFePm6oeVIExyF5lKj1mUEDLzz5UHM1o5qAsFYKt+NZCGYJ+yjGn5cTdCD5wylYKe7uztyK/rkDdGHFFCs0Ne7Kq8qLx7acVVK9+R7hQKRByWP6/uoyNqTwhbSwh4h/s7CQHjWREdBAMukW8ON/btnN16P3hkRnh2CqyI87Ix4GG8+eH2y1dep0FUOIvW4C/2cMOSIQAiJEwDBAcmfLNA5cjgIN5l6OAwqLLMQBYEhkH1xAP9m42As7ahZhsirmmGavIGDOI7NM9enDjTndYJqTCRHLN5UCjMfW2Cb1vd/ytPOPNiIfNNImaIQhBiKnapttYCmymwDKLAvOpJbO4VjJ73+zNx5psEhF3m1vngwEMb1gFHgYAAAA=) format('woff2'); -} -@font-face{ - font-family: "dm"; - font-weight: normal; - font-style: normal; - unicode-range: U+1B00-218F; - src: local('☺'), - url(data:font/woff2;charset=utf-8;base64,d09GMk9UVE8AAAqQAAsAAAAAEawAAApFAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAADZtbGyAcKgZgAIEsATYCJANCBAYFgXQHIBvmEFGUT1YN4GeCDe6gf6DUKOr9mkaNlN1x2LcZlFdVJ1lAuV4QOsBfGD+AgypceqID4f5/2rT/fD4DNdJm/lSgZ6auaWzXU7GIOyFSswztEhWSWkzt0Mi63sc+0kIubqKaQBrusbvV3x8r/W/jOqzHCyga8Ij2b7VddYFEpUcnSGGLnh1IgS2e8p4ew6mMKfhuV0sUsvt2QMAxIITI+WVXlIZXVlSav3sRDjij3099/DSQxE9D5qcR+1OV+yMnyfLr0WtrK2BHFyaQFBn6k6UFiB1W4nI3hX7skrnQH05qjfyPLzaRJkRYXEFTx8jCzsk9ZHjlpkj//MLsJ9c1bcwrzv+kYEFilrAxO7e+Lj+xKdv4PRIU8xPlyoqZXyiVfvXJ1drCSuTiEJ5mlJtm/Ggf/tJv+N67xlFluaW/lID+DxlQ4ILaP0AHs+bQkkyZxrgAIYOAzhqukDcSRfWoGDWiBr2qUIeapUhpyEWGgGUQL5YMi4AeCBjEazCAS0SR+JKjjD2zh3nAqrLO7ACnz0VxLdyskLpQlrC6iJDIY9FpMRmx1WIC8asWvHZ63fl1o+vXCdlF1fpFTv2viPiVW4/EFNabPNxjT+N1OhC2SHjv/xOcIVr57Z9/K49gKXY+tNuDCd0AFvL7Gqd+AD0RpOF1sAfrSdEanF9FGi2E1AnOL8iVi9ohk1Kw+XE5tNr+ERHOhj6IoJE9AdqPaiMsUWowQEAIVcURmqAADawBu8lcclWDPDRAeUvByAKxXM7K4piq+KwX9ysfZ99Pu52Y3Js/J0+1zNgaLjHBnDtXHt9lbz+9KRCqciM0QQa78UbQINtBhtIoMrctWpHbDf9NGtz2LIlNfpIiKHkQkeUvfviQjX3AuN2XBse7PZPG30i+KyDbbxuFfvDwpkCIz0AeB6pgExdu4+1UUyiv8vlXsLldVBWucHUNC3eVymMS79ASE0+De3FDMfcCgdARvBs0pnYmx+EPEvWP399M20AfRnAjp7FNSpYjEXpCVUXJ6E094rXRK/UgxFpQFLci1jLkJA09S7RtajF6kZAV16bKxY3LmlDxNVmO1qFRijQCj5N8shx74F3nYJc66msUBQeW+fhIdf8yXG+nqIfdKIqCXfj6ZIj9vga/AHwArqseBHlxa4T61HwhhIAhB6IU4KYep64X+/T3FRcX8/gUGxp6+xjSwhjYxJl33OFilf8HQU4T5aIIr65SKKqPKs3T2GSz3Rm0cXLhzW641MZ5NQX7J/KUDwYuqlJ9TKn98Q/9bigAfuSpDVYPUksDQ8vLhj1aWkbG57T6ry4xT7spC8mLkeGc/LaO3JDLlyXBlkwscgG3Awyop8vrFI4jUw6YwG0ueh80qU+V/tXhDYqGgupJpXvX1ouGH+ia6XRoz2uPGgtKuPk7wPNxarRnfqljyWzxgzcX+9xNt07qFbgo7MPt/V0+1RMIv0RHOQXoZy76Aj2g1N14tdoWdSMeTxijhGsTv06ARMmZ/oGbD8+oPgKfK/huL+wQAwn85Z+ymUOpPMPbn5rriU8ZD8P6MAZCiLgJHr5DxGbFZueI0tk0WRoNMQE3M5qAHbmDn38lHu2yvpzNfP+S923gvrkdYizeizfga/6MJfHAgNdCxMawMdEiqSxSFkU7EEL4Dm1h2zVBNPj3IDgMkvXPVbmUBXDUWwbRlr+18WCFP491ee3EFLi/B14Z5JNSdPUC3++c6wchdsGNLnM+ywJhTBVUly/Br9g8Ag4OsRGasLsTDjcGNYCoIbSJnNsBv89QVl3u88ruirZa/xLTHJqcba/Iyi+rEjXbl140cPK1dHnuWR1Fk8od5hJfL2cRzoH1FCqEdy4F6pd4lfcIPaTiHFrji5NScraRX+7AZZ3wqxlfGuIrsbojQNfUY1SXtDjAX+TnLTUz9c8qKigq6Miin8IZipxtL38lCQwMC3aSMk9lPIfMkBcvRa9fK6ro1K/9RQ38p7G4Evg1miDrsewrdSlW5UgSQ6XcZYZcdVJalWOe3xGtvobiKPcXZdIB8fJwaW+1X4lbKkO+lVdk5BeVinoulF10sI8OjqFfB/ByU18mlImrKu44u/uHXGNC3fIJclU+XBzq6Ox329JaklQQzCTghT0jEv7EhymC+ZBOR5OrTtlgSJFjdrI0jW7qyTr4X8RcTX9d4SPvqq4qfHco2eCtzLv7cR1+pebG2xYmTDoueIRER99mUr7jgVYd9iUisQGvjchIeZVWIm56GWR2zSvUzDMwKT+I8ffj+VVUR9eK8VrsTJX7susFD19QxzIbky/hLkW+PRRzIKpeUzkArqQqUhUV/1PwVcU76ldZUA7gwY+WREkiJQIjg+ufHBINEBMFvV/9TecoshUZeYJ6Pr6pnj5EaokuaR/3KvdPcVs6V5DGChcObH5DDMCJ3IFMZc7OwrNVOwVviHYwaQFjcBVpEVg7FDOueA/t8WfwUtSfgrOEdLe/tttugQfsCQUGtHGvc23Axo3YhIZS1QHqTBefCTfPYLvN9iMLWaRmQVuzfgXtXiHLYdvmN0TN3y2wXfF3zs5mvD0pYjKo1q5FpIYUfx7XEPTDFm7MlvFjPgjWv4Z1S80o+FxEltea1Wqyppv/6lxaWG0neXvHf31GdYiCJ8DYz+JIrMXjXOUrlTpWiSOmXqeq0cNMQdvPB1dN77+PfOgEmChDlKv4fIED5itcsIQx6t4nYe+bnBXoIbMJ/I7ftZy/lLgggMsXS0tdJT5j5Z+JMtj02AwaLtjVjAm5n1wUG6ddTkpsYrzol4r64eb2YKfXdFsLr8/3crm2GG8w0McMI5FJJKK9l1yhwCVkEdItQrTDgtkfaCeOVqYpA1egdUUTdqyQfrADdClShnVX+N3D1AVWiVk+aTcCtSu4FlrhjxX1BXWhkR5fGJOuepPxQQaWZBDYJY4fk46WWLDIEOAANoXVUOyG+CmJG1NTU4d+7ZaUPLU4OTs1PbdZUuqXkaIWIDSrBAJAHJp4xC8gLA6i4ABAICBoGGMDRpjhSZdl11VAFQmIzTH7yRSEdEusJHMdQMrgZDNsSoBwogtAx8VxftoZ7Lb/AGjxiJgXMJvrMy6sJJfGyWYqYNmKOZfU6hyW3Hiy1sNJ8/1UkapTnupQA2pFXagf3Ux3a+toG2hnpHYKflSBqlJNSjHgTN5ESH2rzz7wHGExQwvPCp5+WlK6QMATDlSl1aJJgzq1EPYKPBBEJr3tNbktU8Ptm4Dl9ruARy1nzulTdR85Jlj9/0PsMHeTl4DZv1OxZ/4DxsSjEHKCVAMCsTHCzCGGuVxqSeIn3uIgtpcwufJzhNN0FK/BU6XG2Pk4Mq9PkiAaooACRJCCBeHEALTAsikBhxZTBgw6TFkAXaU5nI8Q1MO4wusc3VelKs1qFCtUpA61A5QNK9acXmEJ77pROVg3KqryKFaWpy3zF7lK1dnT1qtggfKuMSLXVrm1Fna+Dvnshsotj4WETnECOyXUbLFKFShrrlWmwQI9CVbDRgEAAA==) format('woff2'); -} -@font-face{ - font-family: "dm"; - font-weight: normal; - font-style: normal; - unicode-range: U+2190-21FF; - src: local('☺'), - url(data:font/woff2;charset=utf-8;base64,d09GMk9UVE8AAAUAAAsAAAAAB0AAAAS1AAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAADYgTGyAcKgZgADQBNgIkAxgEBgWBdAcgG3wGEZWcfyE+EtMN9vKA5Bc0yUoz5//3zdWt7i7MMhk89Jh5YNQyuHYEC0QOBNNZU+uBFf/fX3vNFsIYcE02IK5Pv98Efw+vn0T3zFEboYh4FE8cSTQSMomQ8Ug0TyqZBNtCn6ltmWJarrMX4mswKBIYY7H7bxbmnSgqLFpx8yIZ/cA9xgIYCyNgLJKMxbKRoIjYCNlyHxrXrZvx+abPHxHcKkZSgdCXUgHlHqe3oz//PyeO3o9FFGNhscnzV2zcsf9YxvXRZ1+7WVpaVOU31l5RbOUBkheyMMmlLEZKVK6tsKIwZ+2aLRsVZRNls3HL+pRnzcrVfUmk4lcW376y5PCf7C3gZLvzqq35t/M+nUD5ywykaFPZH9GRJYNONVRVTftcgLGWUIRLYJCwSaGn8CfbxwbYj5JJypDaol6kJ31kizO5PqQxs+bUnM4k56Br0Gkht7C9/jE3vSeO0BJupOuTZ4LoOC298SmP8tF9cfSWHk8L6T4zLaXjfPKCbpwNpiXiyMevCxu5WT2DTtfw9clOp6Y504RNeHjUi8YuUazT0k/jTC76zsgym27QcbGUT545YKQHiSNiCTe5Xk8nN9n4YN9gb19Sn6a9FuFRvaI6Ko0/qhi9pcv0xqB5ibQ4a3Jf7+BgXxrZyJ3+ulgijnDj7IHJC8FiKR0/dYNH/UqVVt1FR+JMrtdjiDg0F2uofsDBPCq2p+4WkcQ4/Rc0CYQ8BEJHaTsf6B/qSenV+i0mVZygWm+cwgtQpJC4kChANpOaHAGRNzFs+v1rfGz+9AbNpvl8qi5YSM0pQrJV3ORaf29oWT1DA/2Ila5ZzAIHJrSkNB8rKfm9uOm9AUf9kDPZ6dI0R1rUF8YxRkHGSZlyab65jFykC/2tt8U39C3Zy7vHxdiD3N3scyc7HF63O6211dvU1OhvqE96WIxzBzb42zglGy8bF8z+Hn93V1Knt7OlzRJwO/wO1v8WY2Xde8n+gPhGfPvWRdJJLxOucWcdzTC2mB9UWrFUcwkjRWoEg6dhL7EO5IEOXyC5v98fCKS1t/s73O/Y4+22lNA4D/tSwcTNDd76pqRGf3Nbi8Ud6Pf2M2uo+5MN48JeTt/Qt5feFrrQHyTX3j4epQ1O/Da0a0hUDgWJWx3B2qDxh49Ezg2FiEJfny9UD/swXA8EAi9OzIuI+LCvvSvQ0f1QROTE4lmpAO8ygwEIQwoLtv/g8TMIgQIAMzMAABD2QotJtUXl3ywvRAIQIlGRnmMB8EkROUL6DnCpsewhrM059FDtivfmBOYMyI9OE1B7WOhuBbs2gbRVjgCgUgBIAGQAs8hgClGgQgUw35zPzEgw+x8lEC2E3C71ywOzS65Hb/83RFaeA4Bfaz/+dHJy/dvvzkxNU+hLyj2ADIkxRf54muAPswIYM39VAEN/eyZ1I0OSc9Y7J0auUtmylYIVwgEE4xIZTAkFUAuzXAYFtVwJEhxcGYCrXIFxOZK49EEsm+9TpFiNUjmyZCunehyqtVZbY3NNM7KRV3UL6lFZbUCOfLVsvkm35SmXrUKhlVR75ANKHNWXEbR3JuEdbaX12qwcQKftHPKwMjmKFFKtsdJqwZQBMWaGrQUAAAA=) format('woff2'); -} -@font-face{ - font-family: "dm"; - font-weight: normal; - font-style: normal; - unicode-range: U+F8FF-10FFFF; - src: local('☺'), - url(data:font/woff2;charset=utf-8;base64,d09GMk9UVE8AAAJ0AAsAAAAAA/wAAAIrAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAADYIRGyAcKgZgAAQBNgIkAwQEBgWBdAcgGzYDQC4D7AZ5j5pUbZm4E4qID3FTshVsW6OPY3WC/+/H2rnvv11ENDEkTQxJq+WNZLJJVAvFm3aTBlvR0kSNfr+2Z6qhKJBM0ol5Y6ja8JLIZEphKPV9HOFkV4XMJnQRxziiAWnTbbybh5rcbQEVaSD4FH+CudcDRKD1sl994/9u8i2QA733nQ5oNDZoniWWYBj3EluDplDkEWft9AMzuudgMIgSdgg3JGVZ6nd2Bv1B6OwinfywmQofJcRH0ykzg9Iyv2VCRnk43/6X4PXUhMnvZ0DScRl4PMKMu/dGkVLCpfSOfGPKU4mFo9Fou1nWW8n6duj0Rd30ilt5v1vpvG4gDg9rRU4mn5ajexpYLbO6SVMBJFn6yMPnuxP+npv9XR6Oexf1IXJq7ndftzNJD8lJ+eLN4tGnQyi82LheWVxa3UxN+/QqTSCykoeAZEqVU/vW9jG4OMD/A8AQVapZPXFvadqHlofodtvXIiw8pNo9iKFmtUk8TuFR5f/AKby9MHb+CBB2lNQMrABYE0KgkgMgQNB3by5Ifm74PoDHdQc8nr2e/PsjvPM4CBgAgnD1R3CR3f/+5wka5ppslUPoBgEJxDCQJwFDoIFwhsAwxiAAE0VnpRGKc8JoieMZA0MrYy0NTVNWe6wa1KnXOAqs47ZKPVasujW1dOXLrh8VHVNNM31hlVp0dVnrDCe6X0Oolp+rqQoHNO6f6u1jEy0DfVb1atQJDyDxv8QBAA==) format('woff2'); -} -@font-face{ - font-family: "dm"; - font-weight: normal; - font-style: italic; - unicode-range: U+0000-007F; - src: local('☺'), - url(data:font/woff2;charset=utf-8;base64,d09GMk9UVE8AACPgAAwAAAAAMhgAACOTAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAADdVsGigbIByEAAZgAEQBNgIkA4N8BAYFgXQHIBtHMVGUcl6YiKrNg4AvDszzN8WEynDICJ/38/YhJWU4TgPAs3FMEIo6bCIl5YQjGSHJ7PC0uX/vuAKOjlWIsGqW9XUyZJFlZKL9c0b0ZkXO/YIgud1/FQTeFnjQVmFkaWRhoM0TCQMNNMD+//85v5WP03W/3szQCUPueevuijhaSeu4BbUkaIJYDG2QCBDU60bN5IQ5PP//ybb+f3+8Z/qnWmtZcxlfFw8wJxgHQ8KQ0QwYMmZMYGiwDbFNmCLGiDoiEwBTbCE3jG3AkCfuW30KvmHbPjR0/SOeEjQi270Yan+vU9f/pBBIRV8nxtsJx+H7S4ZvBWSX5BI6DtspsBWeOyEOcyfFZaWYcAFxmNph64q8L10Jtg7zWB7ql87+57JKRAhf6uZuk4AKqjWHY1DICInvxoLEoWOIzoqg+7thjLH/YzifqWAPEHXPMkBoAhiQG82XsVmXtXyvdZuEH3jMzyDwc6LAzxn5GUr7KUXHU8g/PIqmtoLxvbfNvVt2gSL+qeZVKgDoz99PscFnnNPjsnS/+IN+HnBIAJE47PlKZFZtjDk22qUfvOCDOJRBbxgBh8MmOAEugBvgIdgPH8KPCI4oEB8kDElCpiHLkHUtCb2gEI3uaH5emmgUnNPbn6uqe8sFPyBKWsiiGjyPUV97E+GKMOKPlAjFrggVvamvql+SIMGkgmWGEQhPwkq1ynKlTofvr0IYyGdnKqDa9yTu35xetr7grLCE2Ege1IAsXJmiU4dlcZG5F9VdJFdLEi1AtVFAEhK2wSBAhOkWh8noCxZDs0SXRVmJkikek01AiJRNuCMzZOJXlevPZlK8FYNOn3yo321Jirp0nJh8XTzS77tk+iTH39gQWwlXozzu/Ot8CL/Y6DaAWmD/lSYAxmQIYA3KAfoAuoHIFk5VgtEJIASOQMowIRhwC6JBNiInEBstgNZC20p7RvuG6ugQdBp6AONh9dgOnMTH4r2EmphD3CD9SRPZRa4g/6cn0fcypAwLw8q4ykSZw5lLmGcpjPKlsqgN1BWWM6uK9YAdxd7APsshOe6cOZyTnA9cA9fst/grufu5V7hveYXBCT6Hv5T/XW+Pvn4bFf+JD/yXRTP47wPnW9757n++m/s/Je1gtd/qb1k3wWTBg3/9P+vGJuNKhsO4F/Rd0Pi/K1St/j2D04I7cJkhibAHckL/OzR2Smbo26pG1V5Td0yzVQurNnHa8Wl+h67Tt06/V8NRs14tWavbX0H+Gk3TnOBVTxjvTbRAPqbyeEzV48gbFMhixTtDskQ1msRZAyd4IpoNckyBUTyK09pMo9Wgnx+pSjeJsB4vShaLCkEzrYk8mPAtQJnUCLlVf4w4HXhe+9iqGg3nK4KdXfCKam6siryOmTkbsaJyrjqsxxEHmKR+3I2aRAlR8mngOaCiK5zSfEtbPeh7eoSZ4uVXyUFt99BQZZhoVbk4d2pnVe3wpGZLWtwJ5VNXnilaV49IhHCPDKEv1PAvrNYODn0l43VFbcAZkWgBh1Is9EV49T4n38qTm8emFi1j9KPyXaiEtt9HcNhDyDLoHMB7pVPkYHAWtgq995BiX+5mdm6JuFjMFexAm45WdnSwnzsc36Dh9G2796MfaPEwxIJESuvIro7mt/p8mhLcnx6N2RhizBwWID2Kn4XfxYOki8lCtn4nt5ULuGVmnuzVkjB0lC16nHHLyGnJN5DwK+kwvebz1HfzvLQely+5EaaemEsFt5Qvlq/y8t/kDf3Jpknk1oXtfvG8F3Kj0pgFhtEx47nbV4xZoh1SRwfgTlXo+DuhOb2mfZEtCymS6YXfA/U8Pn3hYvD+FFNSc+KdY1ZThuMHkvp7R3sOd532f1BlulOcV0wIw4mYNIWAu/IUVbm6nMtOObLnLH7wYFBHE9xIR5/j296uazDeIpyRUTvch+xDmkRttwl1vFjDz00/e+4qd27XkQ1Xmttjo72TfXH3oP23CWvEHngEgGRVKLNdV6ugISS6ODMBnwLUJrNcjjglOIN1LOMonuJgfRazQbIgODsDNQMx/hPoDoz+DE+EwRDsvQmF/E5D+5cDHDZksWckHR1H6d3qMpeJSzFW0EFQBvY16HMx5QdEsQN0ds38I7FVagFf5a2PIuEqTd6MAXjxqs7DCysgRPCthPvsHIf01FYyVOijNlREUw9mZsQ6EVE7of0xsDDyupv4Vtbbk21YIXVXGvUVHtLbNxNfAT/yltmfYkIdkeTAzj5td/OESGN3LLGhC7wAGJgjryWNtP4c+N/NmYK0Cl4ku90thtXZallFXoJn2NY8GaNhiYenluCBtIhIRRC/KzasBihg8gz+GJN4dMRkMqv4CbIozf54OY1V/lLbelMaoB6b96X1oCL8xjj10B8hNGe+6XPflouFkn0KGIUcbRDiFkoQ0AY4y7iVhXyfS4M/9ZWrGhVNJhbOL3EHgel9KYFNHN+2tkh8UPlinVOdmcmyoS1f4rDh0c3DYk5zm1bbm0AVS6v2l7WUmdTlVhxGX0yzCKfY3hsCVswrXFPl+ZabPQOj66w7VdhImts7Liq0qV/VgJxPeRzO7uzL9jEhVnMkaRKxVLa0ippP27zI0hDOYhPEUMLT8SUim5lVW41cEsVljRX5N2/vv4S9l+NdilSFyiLQnAU44cHWAh7GmY4yo/OS/gguvha2nTKb96ZYyGjxcfrT6e9TsQ1rddtFaeFjm25EhvSKRM1OXw0GasALTB/0vrg7A47Ab5ePPHzlANniKTM7NJ7hHMVpbpXKTd6Jz7/GlMmj6RTho7OY4ghHhbcTRtu85X3lhiqdoAklsLPgGMIllSeaEl+whi9aITRnieIWIN1jg0nJ5Pn2443n8LtffGYxiGIWS3v5t58MDenqjCLvPJ5UYCASBCJggKBdCfO6eKaEpcINzBG4QGoelpJaWIpwG8NgLpoK/wXtc/IRAyZk8IeyVTFr20WKQ0WQsh/Bfv/nL2AZF/bnZbXBjbvi8HhHvRPmUFawkFNnwlTaKFbNV2Zks6wpDYNDMKuwTm69gbIuPd5zB7+4HtWCyTr1GWk2cOzNDnWNugY00+ypDF/TKhcLkfIj3UXTeRibuGsM3W5/T5ewCzHIQg6qtz200py5pt99G+LVffnDhR2XpgYzOq1xQ2Y5mHrTDyScIH4NbdFYX7UT3LuLVz5+dnyJzKzJ4gbfs3ANB39ulO9npW12Z6tFxM6OGND14Osbz6sD3K63/J7t0QcusEVZGY/OpyXtsO7l6JDAaQsLUZGraCU2pTRzZdnNXvUnF4YVxcAg0dpoOyNswNlXQF/+M/BfIbynPFbOGjqfc8NABFQQIfUIrC3rJyPXPGUg+KGUtZzIFGs5mFXethC9zDk7Osot80Fsn7igPlPGuECtPgm0XeBhPo9y9SgMaSzyrXDFnbhbsCLFxR80TWnAq4w68DoQnp5KtI1kehLSM08Yn78/fZpZQxs3l2k1Ncw5JoNJcqzKEhvpGT01zjN023DjF9UthYNbA2lqiVOUenKVMEZhEo9i7YTqs31W2QHA8I9MWoKPPC7jPj2JAtgQnPhQaAjzwM5jUwfUCGlJaGEkUmgOF9sBhHC4D8pUVJN6LUdZ4xTns41DjtgqL4zb/gbJKkV9Lxwo7aMA1S8ClLOyyNCufn5iitvLaN+Hw0OPT9pf3b5zB5jj99CrlGKs6lhhKJfhh66nP2kdwy+vJAvX9g3aFgpcZXmKO0L56C7M/ctM50JZkCkaZM/4Y+A/HCQiS8Wi+n2GCyWCnZQljsSH64Iwa7tTPfOZeSvi7eV+/eAQrAhHPnqsDgsM6fSSnfhw14lqHsvMY0z8lR6MReqtPNMlFzfBVDpZTJyemBOJe0btf/H0DKH9qdHcptU64VGj3Ba665WlPur7QLzAbNJdTbRe/EaRplUwk3igPFUoiamQOybpo/rooOAK0tw+cshH2nw2VaASpOfk/rMPHQARl1gdzEZij3KrwDhCAIRf4PW1PLPrYWAci+TnDPGmWQ0nzwhpQT+iTTDD+iFv4pCAPb3NnNmA6T/0pYzK/7bUeG1qNBJk3/QlA9YajaJvQQi3RziKiMc3Y3po4cNB9GZhrSLGDO6oytfZzEMfsAphkIL37P0p0Jq9cj/jgeWTYKgvAu4EbtpiLhwLnDPOLrSqgZPok9iUtRWQ2wxOIfZMOZo/tT8AJFj5C2QV9aJJCvS+C68nE9hFu32IkXINkwjmSz8rNs9Wha17GLfmVnZhBeSy0FPNS9ZU6oVM4gJ7AuI9n5M+6hz6gyxMi/C6rwu9NvUFf0iepx/6NxJLZluHM82oF3tJHGo+xS4U3k5L3XEQcu1SKR3Y9tJig+cxyue4lpOL68ANviCAI+4YaG41pNNNt5morLuaLJ+R3v4SngvcJ921nT4XH2nVFqVWy27X5F3i9FCozTwRTp0sQvB9ylEEQUSjw/agRpxs9f7ylnITs8E0vwFmCgU0tMOD9H4pORLBhRgrF4rlNwGB6h8bFphWuUiIcJnqWTtF10WuUbONFj3bj0RBn11o4Kl3EX8p4PTPFcB9IR/VvvWm6ubKRoHmNte4VTzQAn8lay4MkwY+IyAm/lXxlVY5MLLGwi30qULD11KP8jVCc3tU4+bKenvzQg+fbQS1aRYtSXdNCftw6iNGu8lOP82u15ZgmXA/048ycZzxfz2nTO8LyHIgY1Y5rOIjvT3xNK2iIbVlJfM5e6B9/N3ByJ5FlPYWoYKnsaOnkCHvsTxFrjLSHhAlXhczjJi0XvEw9Uo0Ic1ZYjrn2LCmKjukYjhi6jXA+kx7uHOUwUaWsCtPiwHeMPgWOyLa2PuIz8zRHzFOWnD15ad/Br4xgF8J5al9HSVjHOYxZWjoJ6o9Qwr6ZRkhGYoMknQMoE51+cLeBo8mPTO5xfQRtppJ8q8dw+csgKrzdvpTwmAtDT6UtE457kc8ZK3kzxDKQkuimDJKuWY40LCHhG8iPDikk/H22z0cQTuvUg1YE+wJyk2m8lhMoJnd59Za2H6x5UrNGNOYXH2zRVErO3ZLv23FcLvmRr+bHZ7RYUsf3UG/KKO9e1RqA2yUVeRU5hgDrS4uurhwqYj5Qpsvhb8f+y75lKxX1vNnQ+6F/URkz2WGx2ES7pS7gfqovVDoDzLHXFv2W9nm7N34twYC4PgW4SB7pFP8QAhjqcP2XmV/H7D/XDZ8TmHdQjo8fwQddaA6NkTS6GrPukE+m8tPTk+1dzBZLYqCqIGGA1JOozhZISCjZFW5WTy721HVCDtwsympqJrTY1a4eVjv5CPoSxnkRwA0dZE+NodY50StROuYRUaj3oe+rd6rgqhzh5dmXRQsn3mlKQINzQ2Ze1xRWXeRxsDnpLc3K02raEx8rVwRYSpEM7pG/0JAUe5ZltC9v69CeeX3tKqHqTJ1qt3X6XqNTBCKd4GU2MNoGge05Mdd2XH5UNFgUY+EEMt/Uh9hWtK0dlNtGnDQEuL4M3l8ggTY1R69cFo0cUKwlWbatXKraeVHFycQ5RiHhOLD4iOxXgnxp9KKpsQtfJ4FOC8aEXwse95o8Zvob4/34JZcO2k43xI+SsdtXFIjRRPiRTzITTEoWmnN3sALjwWq33RjFKLfiD8EdLTdgxghVGLtRbedXBFCMYmreK1gD5IqRx0QN/EfQmNLkdlXbANQsQEAbJFYSjuLEIcRnMw+gTDaWYx4KTUlRkCkFFJOZO6e9WZFxOOL9KY/PJwpfSo6QC01rfKII6m191HsWN4XnJzcxiu9lDR1wc2Dm7ogXxPcYADJBvGK2BNLRBh1l8cWZ8Lu8YDF0hqQ3bujBkgz3VFpw0ysCFD1prs8f4wWw2+Ag5q7+yMSzAtzsxItLj7d9Qx7X257qR8h2538R9mpgydAn1wULFdidrVBrpsb5hlVwiiphtBP3kvsuWeVwU6MMJNjZgFnxL9mEl4Ga3BIQrr3+eeviocZwXMuwww6UnQbvitsA2bY+xlk+yBqmWS1L55a+3zyvG7Lf/CVm0t0SgOjX40aPiS9ZMALIpDIOAeIObOJMNnRaNlutIMH3EafCKMUSye/5N+9chl/PVKXpbOAZTOXhjM0OvkOXUkJ0TK3EzWO21QhEDkzdme6Iw4bMfiSCN4iA5zMk9Ge+Up5qQy+LhNrN6mrq9h3a2w8NQ02spJzOJHtStrB9eLt+uPXEtxX/bK7SrY8c/dh0/vt7/X3mOnkP7qROngK4oqNaQfwwa7Khj5YMYz0vyYtLigtxIlFTQe0AIH6svFARYu2UdCYunOfyJ/Tt+gYJkyIGPuYZYQdfddUevDsMzCaH7o9PBQMQMf+1NSiHB7OOqXvt7PnL4aMviB6d3RL98a1+t75Z1lIFsERJVuIvqzTC2aygenswZnxwJ4J7xjAzWABLksgB/O/fzAeXfPzns0pSxeuhOQ8lTIfNC7yj65QKt6ZexaQQsVyyPV4EonfZDzlOBkd7GB4vMFn8D3QQfpyqmdsBgcdbXiEeA0h/do1EvfQONYXGZhVkpmfCccz0akz9UMDrL7qpvuvzpTXgLu4fgBXs9skxAU+HPGjdStHg37TogNL33Zdq/+9DkiwxlUFNszjKgkKnAJojsJHNWOjqbwIP99NyNla3Jsc27PGYFem1WLDVGDfqXcZhZNx/9GBkXMwh5IifMHKAfZG5+BDA5lQdxZqlPYupMxoov6SgKnPHn0wcrNiurbCZBoqBo7YFCHEtQbdA5IaoqUmmOfnk6zENqmFGvVaL7wZ6KNCxRG3Tp655TWKL26krNkhfegV8ZFW769oKTftI11V4HE8jajZyejimmPXB/S3N7FV6IXhKUhzem1Yp6bJ5en5vEuZydUnUEa4o0b2sGefMh8XnOPmdqF+cR5RXjg2tfPynZPfNjyD3Yd1R+uO0ByX1UJHBAuMMq3VNFoJaZUNGXcRjClqaG2sFVcW2lS02djgYTKZxDPBLI6UixwRCKpnTRItFkR4u+CYuIHTWrVWUwsxYrRyniIjm+1w1OPxb2+Afl0dXDvciRqIp3h+z3Yr0Ecn4CYSzo7KDuFjHeW722FPaF2gNph5RatOi8cm3LMXZJx4/NHxV/IH5g3zyY6M4H4mU8NT0E7+JNPbRocv4dJz27c6xm/zEUHM+Z6/2m9jFvXhP0i+GtUbwKxwZiuy8MgIY3uiav194L635SBebnMb0Q8ZBgyC1RJlhjKHabaq1/+p8fXY44KLkNCF+k1wi/LE0eLOgxpYW69preglPq+AAt8Pix2Re+1TIrBrEoua8QsungQjkgLSAqUMasGTMJ4aI2Nz8OGGBIc8WED5rWrWbGPX8Iei2N1bWXepLV+rlnfwJTNZ5XJnU1M3vqSPCCqEDhMBooQ8ymGdVq1V7xacMgyPDOIzusigAhhBW8SSmHzvaDaqPb2992B7YyvnHEQrl2UWyrArr0IHG2/qbunuVnZV9Vf2M3r7byvjnjcxxoZmBsbChZz4Fj88Pik7JAE2jaIwokTllnN+R+Wh5/e2fl8h4s6xs0+fQ1r1hJABUM6OhB9Nrt3ZOXtlLz9P1uvK2xq6a+oEikilclWux4a9Tj6ygMCISBiafmQnHlGDI/O+e/kawDFk4y1+s9gLgGBmIwTuLzJcKcLsKA5lZ/lnXWhFUEufT35SxBZ5hUwWEy4dllwR0owATkec3LCetqPth0BKD9LPpwQxhbCyfIVAqrom9SdfcoU2HIM5YbOX1+7oO3wlOZmzE0hejPpHnro9w7wXzi/0PKx28h9dF7blnEvxxYMq3XoiBCWnY/uVQ01fBZ/VEhGhl1hD6O8L5vTZu8JgR6aLWJgUMfEJywk7/AZdK+gsjKfZomv0jrD1L4iyHWmmOA+5e/qX7Y+4lJ4oQtxP+3zhsXGxTZpRC0+fCr39iNhFH4R7rIjb6glrK1UV5ezZ1MFkHRc2BR2d3yiLYpcqCxT5nJZUhDcIsCkzbDJqr/z8CfbIGNqwrqxSgxmhKA6szS4M2lzoXuXetaeE5gFvU1KVrNeP+aVDh38EO9Q70m5CeHVTLDfGAc07d1WWjj+AuvlHf2xxVA7P3IffTMoGJzVF/okv6IwmsseKrXaOxhmuL98LGMa78BNVqkp1FRQeILzjRByM1L5UnXjam9d4yzjGtKBhXKrq7LbmzO97W81LbJJa3CjrWqZvIdOc/If5507uAz0u215wCFRIn9QApbXl2fZ2xzbPyEhH3DD6xYd9cA/3RBPqx6cFe0wfYyH4OmSYOYHtaoxcVhdEk+amWQmtVUJAEPzSfPA249MwARscti8/aOBFTzVKkXGKFNnDaZadrmFU1ZM62xN+H/PipzyM4tvEu6xhg335jdLTZxwvNr3WfebCKbnhimXNHLpr6qbpW6YHa2514YB8JvOC+QubxQdrXY31GIdDolbr0VzPKOpF/cbdeB838OYPF5UEiS2NS+QqvU5Zh723Lj0/ChCjAWE48ysR2BvQlOZ1OLUuR9pHwzOOpAE6mhEL7/LcLKoPXpCQlZiVxERNDov0w8sKVZpC0NQUD6+oM1d5Sor0Ctl1F1lDm9geV8hSWJHKoKe/3NroNWf1gtjNJ+5DsFYuV+ViRqsNo9yQkbpIP1YfNSfWZ2rwUmvxfm94RL4yX8aQtGJUqnwjs3qxKi+PbVw48kuw7tJqVWotbLqMVifLlXI8NMxw+Oqpq7r3sK5eU1fH3piT4aTlPu3tfvLy19wDA2xB9tGxulqlopbT2OJW/b/chjS2aJtW45q0JdNwzn0l8U4X//3KGFMQ9GZ+2bc+/evAJDNX6n/N3F6nrsP30pPsZ4P+1hWZHJDlVZxVnKnIyN1sdL+dZNe/hi4SbcrbmL3wcqBkCxO9MCjTGy+SqzVySNqG1mzVqrT4S9mPb7tgvdb8qLU1NyMP1b/Q7F7sGnN+e9fQB7znIdLfMn3FnSNVsL4u+6ll9L+WZ7QN1O/rwDPGNV22Y+cxyjRccpVSDhpavfseWjxVy/XUyT5MU26ym1OIXQxkxorrF4a6BvDxBmmmHvTU2K7O+JNG6e2HmWdg4AEUMyU0IgzPyGhoTQPdd0ic0O2KtdAxDr7X798N9y5tdqN37EAvlC0PldCrhA8rm2/KBpXmLfgW1WyRKokgfHyBCFJhy+nd52oul9Vr6sprmbSdnc1v8e6HSP/6z38tTbtRC1vOVl+sOc+Ae0DYTbmG5UDJ3RHaiOXlzwQvIHwMhNx8JryUcyHPJhmWBebaLNooX7s/5HbS90lxjjIHpggbC70U8tI8RQEzwplQ4oS10NYI6neh1bwRAPC1bmn6Enz9FeWyNdk2cdQQl/cpYrOfZkZjjI+7TFOOW2rcZLMZnuhsPTOOKPmONxMdCtuRw/93Z6woF6AQKIGZYuXCF0rjyNi+9SkSaXKRQqbI4Vzbcp3yXJlV8xVZOWzT2/H/qFUSbXkYu9PNfPbfEMiZtxyIuRFTKTcRljjKduQ6M50XSUSUbrisxZsDaSKAHJPk9pu6B9X7meKdo9qb2EvsVv5MNKS+8eX7q7UieesxFQ5tZv0etb6evTnnrIuW06/Rt0f3mCmTmBXF4dhWlArSuCPLqbcV/yNdi7ytZUGAk5Ux/sYd7rXm/jArFuxfFbftwzbWYlDLA3u74w+mwfUiSIiAeZG7X/JDW7CKvsD0QW8uZ6vPrN6KaaEI9ELgtQVVOqdIue8hqviD5keIJBbIedDp/53oKTR1sPThJ+9HZa1vuqmEkUFmHmkcS+otRwfRAYKnRnKsnA87s1ooArwQem1QIUCaa4k96p2wVSHYsOMNfYMNw6xBbyjKV1JyhPCzqaRH3V6ogkda6H4JCUrYboDpfxgAJgKlEhAjrLw7DUoECXVtn8baSH8O6ASbrKGmZ39kXKA425ntXDZGqOVUfSwhLCPdigxaEH0MAK1Umh7UvlpmNBDohD92M1WFhjyfhPnEXrt8Pz35yI6e3EQ3vAMzFrNQZpDQvAjDZ+pST9q5NMymznUPXyZpoVjt5fn3YLXk82z5dYvZE8u3v6Evsby7kCzkkaLCgVwR5Tliv4RYbWp4BdHtgCDK9oVuo59RENqNbqNINB1/6pj3yBkhDBBvUmVOCvBz2UUu8UQVBF4X4R++JV24RHzdI9VveMj08qHxEF8F43mQqBxg/tNJzqHCtsUD4JvOuLz7zcQO3Ekl1CPSz2FpuYwvtMvGFHG9fKc2qkJeKSruBaqtmNfEWiq0oEo/m1Y3TzU4+U8f83xUCrWedniRtYFwAxUx1y0//jV5ifCv28gPEv9GrzvI/5Q4hLkyB7aT822gqd4jqU2s53FtrE5UsYUJoSkS8/v3MSb8+wMgxQu5YIlq4mWqH8t2v5mowwxhTeB9UjXBrJzYbNVzmdBCC/SEof4TAUCxTOIxL396lRfAIvyOtN2LKhwJFzFy+HXLm5cLmtf2v1Ymab2mDRFC3Q6ZBQR/m+IxvSflj+AoY4pMKQdxelfroUpIrxK3+uPh8SkBvE5GrkOUxq68ZuS9vIAgkT2ETXg5U3hvVK+2FooVHh4S/SMC8HYZI0fhyW4ONYoAlgF5n53V0qjH5rEL9ELqTMOOvyu5Z2N+O3a+yoBbZB3qKyEC0g3lJEVDGIoOM6frWY/VNWkdrZDcGKeO+qUUaOL9K0TlrOHnPWbqowiq5CTW1Ttiq+KwYe8b+ggbBlkDbyiCeDphyLuRHiuMl89fIj5f5f/dL4LEECruUbsXCvKvperK1l1FI3eiWz5Ihcv4J4UXp3qXZ66oUkJ7XiHY7SXEa3g6gPVJp+CgOhtbxfnVI7Jt88FM8aHziU3l3J5nHrKgqUE8n+KbcXc0u9P7y9TgKHokekPQy3tEYCbDWWp5neaEQ7fmrCnHslAsVbuHBFgzOE9m4VdxN+lC8ma2QaTb6gXcUjNH9miJHzzOenMybhu4MtmchnNkCb3HdgEDNWSICebdJ62f5o9KTcr3zsDmp1QnlccBz6RiLBetqA2j8WygopWt6nxOTb+n5rxT1BlRRngV3Avs3NY6WrNGpVCywwpSHCndyL8444OnVYSdEl5+SZjmxsRF5w6RRDb8oWX/J7dYAz+Y4BtzlrPZn7Xwj3QmGvZjE/sKzblr/N5u+920dfck1l5hCtG97OeT6cRzN/lPrXvxdPplxh7m5Tlp7wZ3/C+1Z8nCxXPm/kmxBl2FKgCwQwQgACiL9AhUoRLsLFPlfI0uvkcPcVswg5IaXGLrlpD6OPBVWcQH0U09H2aIi70VXMsZi0tA8iMQDqJlWAumi8hNZKbEfkS7FMKMhSF705CRD+f5P7QQCYYesNnzEyHRMLkaTbLEH15ABFJQDBA4tlpvU0urBkIroXmRcm3B3VDI8qE9lr2G+jpcdupi7eDQRnmZthZW66R04OEsnVCshAqqyrS8SKo1le5P6UrHRxO0AlfqSp/TTiOGPNryi3ek5hhpQ9jycm5kh4pV5YStlKpdkRRDmZRniy/1k8gmVLKmxYiPUETzb8qu+4IjsLBx8fCpaGiFyZUHL1tKq1ToEAByCUwpHFSAKeJUySZES8AI0ReF58FBCxoJzV44B2C4vYJCLgHA3Bg8gVdkqR96hEDLgbFco4pN/5OC3QvSIwCYT5SOkUafWCOhV+PHOEvC1BIzj9JTMvMIkMwy6rddQdrv8P9uexljD59/Pb9vYzq8kO/2AHQL6vJu58lz5p/GHjliS/7/h/4QDasj3yA+n9Du/wHUTbpiYDs8gb6ylXp4GJ0LAPsywstnV5yQwJ+w+Q2gY4D1aqzRY2XO3ccsmr/e4ieAUZJ4MeZIa8UB7pmhSKd8K/lD2AsvAbwiYESM9BS8YdwF9C6rpdkqIdokUValWbWxhreFYSttgwFCaQwrZzdlgguyGhAsHuTRIsKm+KoSUXPzbaxsgVylWmu/mLu4yhUq5gC3PGlfAyLIeylxAGZeqzYcoO5wqj0XllylM/U+g5EOFvDZVNS3hwGn566ClQZ9m3IExpS/p+cCUAKkA6MBQDBT51FJj+ycZ/YJl1bncsJLNWo5pAFgzCHq7hQjx3W4n/9FqVOvQ6MKZco10/p1fu5FzyCgE9imyxGfhdq9VrqP9V21bYQe/0ukSjN/36KWLhgnLl6E2jVBsgQryVbfF9PlMPo4Czyb9Cd1arVp2FvrHyf5OLKp+609vBwAAA==) format('woff2'); -} -@font-face{ - font-family: "dm"; - font-weight: normal; - font-style: italic; - unicode-range: U+0080-1AFF; - src: local('☺'), - url(data:font/woff2;charset=utf-8;base64,d09GMk9UVE8AADWoAAwAAAAAhvQAADVZAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAADYH7QBocGyAcKgZgAIVSATYCJAOGUgQGBYF0ByAbHYYzA4HucN6gKkGzzEZUsHEg9AxmUZRLzrrA/yGBGzJ8NVDfEQEVGVFi061ZzbDpViOjGy/dspBL6h6DlSgfbO2sbIYKtis3joUt7Btt3Bx8QpQf+ZXNAz5yhMY+yR2etvnvgAMMVBAMTu+wApMpdmNhYaKNaDMz17ooFaMWpT2X/efcWhdu+xm4z4KIWrbpvycm+RgRQjaKwqEsGIFwIBTCJSc+RJaK2O29RilC15tmBhVLGeycIOFBidoUhZtj5vTchAVgl8K1KaWUbf+/rzNLkVyp65Et+U1Th4lnIHAY/I1xHGDHOSTQ3o184D/Pr+0f/ntzX1/jGDzwMWImYSFjFZ0GYGGARYxRiAEGUWMRVTa0tAUGZq0z7Pn+//NT7UejnBOVnFnSOlqO3l96BYRMMi6QA4x2QHGdAingggN2wCmE0AFykCrbQdu1QuAA7Xt1pP7DtbVKmpwvesLzN1Tnf83SziRzwMkVEJ8HIlej0j9/J5NMMUuczQHwhP7fPcJsfzILkwOiXPEQUgBi9+PuXOXJkzWWXGVtraiUVVWiStWPn1MbeEUQrsrE2t5Pf+BdgQZsCYSpvBg3O+Mm/IiGpycYYAAa7QUDHADGw/+fqu/a/4IBDGHM0jWyxBqwnuKYQFUqeAW5tsbwQnixtYYIP7PhPCqzToFCEvzp3IY0q3IrfdMNEB+GdDYgQMAFgixegbggrEhcxP6sdbPQ/vQ/dv+k25zNRqvW/a/qaq1lFjegHvz6u5bnKejVcBJ/kwYiyjYAhXv9UtJUss/CDY2r++8Vf1KWLXk1AAECUAJ0ugyxcbjwFoQnToosImWqNem0w16HTDjutFnnXHXTHd9Z8dxrb6374Be/+TPt9ATCGzdlzjIGJpQBMxbscDhy5cFfkFDREqXJJlKqTqcehw2ZcMK0RZfcdN+KNT/4zSeo6NDVO41Ymbcpz3wLLLzYkkqvoMqaCsaXSqm6pjrrbbjJ9lXWsao7W003s9iHNowxNntOPPjhSSJUoIREnVaIKCZN0aTflJhK59S4KbXaO3vtN+0ZznSWs5/zPOe/sEUvcenLXdEqV7fWoYWXWH7Va1rnBja+PYvt6Kp3djW7NX38G0l30X9oOGnTWJpCs2keLaKVZELW5ETryYt86QidojCKpHi6TqmUQ8V0myUsrG07/TSccAiopy98tKQoQHIfwUoRZ/b/JSi7w0jLCaeccZkr2looQmEQTrVWGJkTTodu/1eiplB/1Qi48i8CBK9tdPF5ATGC04dE/U3JgxbvVj5b26sd/1xRhw3kDFuIQsGO9/zEQ936Rzri2dIFTaTfAN/8PxwtTrb8eJZ6szT053nZqYUY+TjcJCZKw4w1akEHB3l+yEdlcaSgPY7E389zk06i1anq5Es6/I+htdJ4oahTpFY2k5P/wbP7JMfadFYJutIZK9ULZwv1uJ5RWWsTZ2ezYEptmTBc+KfRrRd+18ZklqOxy+NSQLU7VxlFUaB4FWNjZdY+jTNKcmrMA0HkrdqT2u4a5TbKGHSpEy3LtzpX1PenRDoQq70sQ8ek04NQSiMPi9L4wXIyUJ9fAbgfLHimayr1DsuEdRtXXtyw6cwlSKYZP1ykVhjeZbvy6pe3yCLr9Caa8sLbQh7LW7NY8lS5iVdhC/NgZ77nxCvxx7es/Net36my/jdJa7Jps51KziL0dglano+VstVX4UUfLiYOgNoDzJ07na5CQQzZThO+OPiZFiMAVkDcwI72QUBFOIWYUZBi64JCYEjo0ZjGnlFQ41ar6PHotoAIAMbUsdQdlrI+IKhFANA5AIxUTr5WW9Q/3so5ZSVzE3LQHlovtE4Xm1R0tXGZtjVcRBfCSYP/NM120+ClMfILGeWprp/ru/R/K+T0b55oeNLBf1ywQITyWqwZLJ1ndwEAgbSBHCBzI5WtxRH59hnVaNBhdcqUarNZv0OG1ZjQZ4+dxvsAqCNge3YfdKofr6ZA6/Q0aD+6NuwHLU2CbrfLiRwlOeW4kzl9mfnPDCwAs5nFPQ+cFRecczWXxDV33HTPbXdzXyzXg4EnHnm6L4Fn4pUX3mZN6l1gqD/7T6q3Ab2ga3PgIOjRvovAKhxlPtfhdCpwA3gPx6qBHmArnMFm9gIjwCRc3AR0WwMQwAFPS+noQBzE/dvVzAU7/qDU2lDLv1Z6rY/0B9UnD0rmbfy29ibfaF+wr/KuAXfl3z3/7tv3DL7H554fymPvbbr3030D71t238b74u5rTBuRtjjNIW132oW0Z2lfVR+qs7SOf528Og/qkFqTgecHdg/6Y5AzaNKgRYMcB/kOAu2lwfGDqwe/ZlK2gp1nP+lPD3Ed4j+kbghnPDF019DrQ98OGzFs2bCdw94M1xm+d/i+4fuHHxjuPzxg+F0NNQ1I498an2kYa5homGqYaZhrWGhYaiRrvPlX99+Af1s0ZZqJWqLWj1q1I/4c0WhE+oiP/03/z++/Fm0j7cvSX6T9pCrWyJF3dJjORp3mUX+O+mm0xujUMV5jR4xtHWc9rmr8lPFxE8ZM2DGhYuKwifJJAyZ1nJQ9+c/J303+btl8SsFUnak504ZPi5j2brrX9EczdGYkzxwwc+2sv2Z1mVUze/zsE7Of6S7WTdZT0+PpKfTn6PvqPzCYaZBv8H/ugDklc8fOjZw3ft71+SvnZy6Yv+CioZphuWHvwrULLy38aGRrFG/0xfNzz0mvUK89Xlfqr/be5+O2ZLfPjC9j6RbfXzL0lin8vo0w/zsf6K+4FKiwMhK4/NGmoNcfOwVf+mTmmphQ9bXHw3TX5Yezja+Fr2XaRPzNe9s0JhJv9m3k7439o542MbIoj+FY7o5Zb2ZgdSL2SvMR1k78Wf6nuFdsDseb2B6Mn0og2H2fcKn1VPvmNiZJHQJNR3PBvXZrkoeSv6T8mfIlNTf1XccNaRfSQ9L7MljuKUJnj9auOuuPZ+luOJ71urtd9rUea3Iu9hzsmZz7V94HeWfyKd5d8x/1Gb15Y8H1PlzUXHSmUG9rROFXcXtxk/jigGHbp22fvn1F0VSxjk9S8a+DglTGm0cXQfVgAVQvFkyRrFZRBxKfvhNngV+8QaV5sBWPYwPUFJJ7TWkXIC/UOChE3HdWZLRhItEQ/fYDe7+N3uNvI/jhPYN7XuHilcD4HqQKwRsb/DpUmx3fJ6dQNbMmMTLC8rFrBQdK6nrfpB19QjeHI/rOWetQP49IqG6a14vf3O3orMc+H6SQnxkpjAc/+6Dx+Qu/s6eiv8Kv8j4lUOAx0us8hrzUPiSe+gvdMbw95mkN8hi++BfRV95eWhz//YwPwRwVIwrisGKuswN/eA0MQGvX/0wbQm8W050hxmdstci3t9xCDXKlJ9w6stqyyQSEAxkwIM56w3NRsOvv4vHDmDjkAdtclbXmK69IvDpU8aKZXmGF+0vRcbMqzH8/RXvmTTmWkMJ8OdcMnfp1N//7hDJX5ya3hTIO19pG0I/PW958bLZE0T9jXxLi7NpUmY1BtaM0pXX3dXCaM3aF16mV6jVVN7A0UGIVMQ9IVirwSIYQMflIBgwWB74lIX0+nAED471KoTjHgUJg8/kYkTlHVBAsOSxGhDHumRCawdSnn/RJfKI0FRirKL/vZ3LsQ7YsUVnRK75vrCgB5sJkbYZLM3hxkkui+lSi8BiEx1BV6X/3qO9TFJzo3x3oP/sxY0FMS9+PDsxhVg6PlXsmxQMY+3Fr7uKNA6Hv1utJ4ZWmCQ8u3Jk6vW3R1/ePog+z6MHIpUcstMhKTd8k945XCo5XYKw/dHBqgrnm6Df2AG8vRVayCer8irlnz/U+2mW4KVC8kH82fFZg9vLfOSvkpCdzZ0pPCJZvhDe6otIeVA8koB4G1cs9bKm4X66YnlKUjnZqUCgsrxCiJAhO/94nQrS3dBOGN+3b4+Gm9heh1PspRf3VvWYmEKTelrWHP9yf7CPElalWZMySdjBgdBpOsLux8j+iq+Hz5K62LpQQL07yvBI+YrVr/NZfc0E4VzyLEm7HbobTrW9azvsgpNpMNR+ZXmjzrnLHqoPTXPG+ifa8M/le3ICaeBanXc6qKz64Y8u6NR9upK9cW1UvV1SCb+SI1hY3fjjME7egT0hhyhfnQsno2eTnrBKxP+bhB7EmKhKPyYAxx/tVmSpYxIsDsFnbvvbwXg+3jII/RvaKqb7aqGbNOGjBDigMYQONpz6RlrtKJZTxS9suQwWEO7K0q6X1J5Gqwg8oxQULHVLtkpCKThM8tZtKOfwY3eaYIXsLS5lVoazInq3tQ9OiM9WNOGELh5YISdP8+s4tQToIm49KWnMQN0FMPbCmO9URijkvL7KZQnOZsjkWTesORGz24LRpleePjp469NndN2caLp1leG/dQ/f2r05DRs+uLy6eXvvnD0GYiB+AiTcOgLff02c2e3ESd8SH2NTDCWAT4jh8P1GchcWAV04XxXLQRzC31V651EgqS5Z/CAsTY4580zViOqDUzohnh+5Sh5k2BeasSORGt1x+agRChG0NYDBSe44qiXoYmxL6BIiV84++1dq8E7lEjQHESrpfC10RvZnUNOAHgwEsX01E2r3r5HoPt9vzC17qbnlftTS2tWdaWKGotdUPl9vIlLeoypZv6dLESqlIpMZZi0Tl9eEJlbsnzZizG+w276TltpOvNC3vvd0qlabaqZM0MhSmFsoSK3ERv+To44AwenLi7VYmWhlOszGDAZkhkTFJyWQKau6lMFoo5PA7Jkah2bFAdhm/a2q2rCybVsQfqHEJc8TyKWH20GdWEGQvucGsdfg61XPoSt1qPlMkFsQsto4PfQrPZbhBRMZMrioiOhqXrM0bR/Ct8ySsSrvOKAlrlJI5jHV1lx+nGi7D72TBuwfnVMY1kcXoSq1vxRF+19hsWV42jS9uDTziy+a1qMb0C6mhEDut0lQ18Zyw44kW6UN9OtI5RV1EvkRu1GD6gmOuWjgu5rlBGfy2CtoHOkLFVqd8Y2KKhWI0zxdOn9cYbyWKL1uLO7DA32Kvr6jJLjijkVb4cXx751sglgT81GRBJkietyzdu2TwZP5ejAWoLfZXN7fg1evSgtMEdSVVaII3rCiokduJtDTva2FWYRDmKxzrCrMctVHi7TkjMgRZteEpm3snKczLD2ZZalnS4/o7DvOTy47qnxIokMeroebusv5v0mBqvCQgdv4SQ2lKOro5i6+A6mk8gUJQvTiKCgFoWlxX0Ii+F3jvU/YbnCx/n5lpEiEnhh7pdoYy4FtTsRVh+1LYoj1351pktsv1h27I452r9hX7IDKWkuc7mCGsqMgQDlZMTRWUdlIEbMgeqBi2F6/xu5AidJrZyxSVqdaKrraXWUEXFNXdXKwUYLAB2qEMFqPgRV+8zd/r+HE/M4UnePJKgBEiVsn8b3MOEtMlU2b3JyKe9JnlES4lpV5GmdkmL290NcaY6XjCdXtNm1BOQzOJXxzt4Pcp67CfR19u8DqQxTy5iT0lpPLyY8QZIJ2qOkZP+a05RlAuKlk45BQkP4+TU9X2l71yIO0+Qvt8OMuaBHYBQaGWPh6Guzf3HKBQ57Ww7/x5+zkESsdqAMNY3XLCl12WFL4K/V8VmoJv3CQmGsOZZ0/zboXCaf5HEXoQgjIPp6BL0MaR+wr27Q292HzrGRubKoEYLQ/iHRQgS0xzItKxnHVwydKmxnmnKNKclNGa3ZiIkZkqt1CNHOmkQ9UtHjuN+YumF7mrLk18/s55VFW8a527rYqcuuVnhiDW+8cuNEdapRYAhkEpDX0mFLYJWzIxcubaSbM17IQW/ZZMES+viRv57uvICz9XOksDKuWKTzmif+Wbyp+TeuWDmaIaeUKsk7OhcwxRJbLCuWg0FGncVCadwkFv9PqDhvDjqu4dDy2QsHrLlwJHy3rTQ7VyA5ZKF0UWizWu2sjoVZL6xpXeTprHTvloP8kUNb1tRzwR8lWaMtXNfRovwJ+6AvjQjo5Lx1C5kYS0h4+1Wqg1e03rRbcp1FzWxIk8C0C9KBf3o/3UNHJdrkOE3KU9dlQIvbNWqa7/lRSp4LBlxNUbg73CLWNNZfN6OtacZ0WfcYOCG7oaMBsVgT7/skyUimzfcWDCjDnYwY4Lj+oXkIePdlVNdCs5nsbPZBUVB7n1HXngQ8Ot9rlo6NVklRzDRrIYqfy4FyfiaI83afWzpDbUCzls0P2JBnwSRkRZAXRxeDURlyuOzAGI/s3XweFxMuQz85hqFa87vDmSh4Jrf/FRF7M5lwzlO4sRqmz/maF+bq/P0JLQqfzyMOrnn/3zwbl1tZOZw9GwRjcp1feOszDQMt85+6ubx27Ce5FW9kmdnzVzZdMlRQGRU6EI2TWlXtVRjZL5L0pLCoPd4Bp2/vMTp6Xyz8mFrms1OfUqqY1BbPbvvzZjXJGrg7NZOrRw6Fpd2HqusxEHpdta7hqHoqgilJKSEeK6b+7hhfnYfXnt7Fu2uo5U9pSU9aKZvQmnRRp5BR3CtliMrBj6RRmzuxyR8o9atVL1jhIv7pirEmQMg8RPJttJd+TG+EjEYcvJq/ljZTYGDr7w8eFdVbXMRRWxq3+fA+HaQBh484BEqw6eB2l470WaVZc2AfDtFw9I7vNY8GgFi1bPjSZ75WdWT5pMVs06lzIBHccfcP3PxuBdF7LVEV927v1lSc4+OHBqsrtEMPkqPS8CsdkyuSgigRFhiRD8NfJ5caM6074wTmWmiF5MHS4qkd9fDaMV6qjcDsWtXvxk7ktDecnSgTYlAQFJgsCApYT7btGv7iOlF0YKp3d2pQ1xcdWVafnxn0qZBwX7vVxZVWWMf0WTBDngu2tAretr7QUzweKDyuw8pmOeeHGoOpyuJUWlftnJsu+zaXA0MQFTrNhTYmxT5J+KlbnC5QsaMnVEYoSt+hDGsvYfHqSWkLaqDWvrwDYfdh2QU5aL7Aza32fBmGliUcXeLERUDmZDEONXLydYTbklQBfo+3EwTaitzWEiqtJtw2Gx+LmnSFblRSYcmshgEW65l1A0hQTATznyBKlsWlhtnswU1gxs7QCGbgrIGdxjLn/fr5nHSNKGz+6a6m3wy/8UUEeZH2xoqgbCAgpWBSvrKj4hB2xDyLmziBHgTAiHeAY7k4cYHtJCc3m+nonRsl9Semqnfp74+qDWHv/kCHEiOfhHRBPex6ortxaQRU3IoiEDnEc/sSpkqFiyfGFWdDWxeOSP1VyYCzMRLHWCta8A2exXVdCBnQza2sR7MLZpt1tBbGsThpnRFh/QjIz5u2yycCJPiZGKzlWm82qpVEOntKhkGbXFZeaj567Z7+WPzfdilv05JXlzdz+JqJCd8wAyfl5eboNFRUrt3NWVk+0uEi0Ax/MCbC+MrVTMiUNPuRGcrsi47UH9Exw/XAxDw5JgNs5xb2svly6yXAROnJdDMSLHBGxsDAU9cOaQi3USuQfkprIYHdojBt7TtiCrxlgz9p0S/pbJLMsvmBi10uTVmKmP0pj8RZC5fpENJOxglqu4KKYnvMxKiZRYZaki4poWkQiBdUyXfyLNYzEax/k4JHE/r6zKeBx4zdNNkLSkrJ6qCAHMv0tomLyYLFJSQjOza7LXQb8Ku2u5mz64hhERe4W9/rhumK/xVO8xUl2hEBIixeR/sh3nCfpyin/YNGgKKbyZSYpu2inLi770Mxn/xUqF5BzIJV0YJ6p0Uwh9zKryqI2OQjwEJ0NfoNVU9Bh3CT/LF48S8c0z+fn5KKkogjMng2yPe2rMqhc28aE1OEmY5eg40SyXoJ3wl0WEyl98kefBxsRJjFZY8BBSMixPLj0k1WaWvyXhSpxsSoG95QivmVpXFqOG3aC1AY/wK5Q6AVN2TlHaI0hl9fhAWDVd3mr5WN7ziV9pWcbDT8c6STYDqE0lA+YqSz6TSGOeb+Vifg0m/HpZ4zJ1nm+8hUo44C0JSpgikpDADxowRNdawMim92CuPg55qaaaancbjEabr6ghfJ3YR0JKnS6jeS1AtOL1HVltB8SV91NXG4yyOnuak7KTdipm6TH1enPUWx/kEj46A6kKfpaEtQFbOgHjiBipSJyTAcOECU7RibxYLMqqfX53E6yWF/jgRgwubdNK9CZBg2nS0AzLGCjuxyQxSSg1hNfGkkVqkMRoFJ1JPfHBPgGHLOlKikSnmGhWSvosRk1DVDbjZ/K+ICgWmqEKs5Q0R4l1TwhVFSehXQZtpaSg9LjA6sHSSmkqL1QdpWlCUw+fzuZIGCzT8ekyyyXLlJTEqFmPItLP5PmlvQIoFkJECqKzBADTOSZy8geNMT/QDBajFTh8FEcrPOkVkpozsh4/Cn3m3Hcl+MxQwIZMSngmKCxlArdexcZVL/6KCHyvp4cB5genOPV+ZGeA6Zrq/dzTBZQzDRD3aiqDtEcNEBtrfiVXN+GMf5YBHwTVJhJeMVFTidTHwROQwOPRZ9NcmoS6TSdVOxMaOgvuFrtT7EcY7kqfgFAg6xAyGwNaZPab2jvUwGwTWZaN6TptQ+T7dDvT/VKdvWsFcpKjLC5eZ7EYjZv/4h5ExAVncGjqiabhIGGNo+KkCuADr6Jc53VNl0fpS3jAGpDm40+MeWBn+rwcZxOFdFOJW0HMqOhSQT7R7eAA4DgYaTb0YyPuxTyeSYjgpM+ol4vxHN+ZpO2S7F72hVvvGyxGz0Wr3lD6qlvVYGZaizUhtHQO6BFtds6fLenlqAuEyKL/ZK8gIgTtjIeqWaDNRka+IH0OeFFC8SPNyEJjQzinbGnmfvTqq5VPLDTJIJOU1TBLm8SwERjv+Cl7qEh7ucqHZk10uXg+JKcu0eqek6pB5OIClPUCvWeyMXQcNyyayVSh91Qd92gag5PU17EkrbZcJqrC4K5WrjiFdWFya/k69XIV1pEyeeXGBazWnSe4aX8rDul9cctA6a8KZkkg0c3fNC3Dh0rCFqfEw/ME/6cOGRlrktUWW2Bfa4t3utSD6Nl/ej+ZVIoPe/ws64vsNwkO1oUwq+cgH6RezsXmeUNanu4gaDEN4C7Wsbd2vrpc082zZXZ51zN1sXjFSEGMRi9DOYgcQ1tFluvJZs4i7XeCx7kGKH0FFwb/3LWqeUy6u0ZG0JBoKLQ3f0/Onmz5SLI+XZBrTqTNpMQ8dTX44aIXCxEG+MDBC96wfeE9GWncw+St3moXnwJD91ybzHK8BwhKW+AK85OljeU4VWmkbmrsigOob+ORWPded8Uqx0HBWBMTIIzjVPRjhw4uPZi4MbytFjZjZitxeKfSb+VwhsAergtzFpFIIbnlYWzPvEiFh9fAXkw9Y3PjEMddE/cOz7s2Wu1GlyWH4oMXyblaB4xlhAnfGOPzMYm29+iNpPhaaEyNXU/NpoNHFodbvZSPNksQoRi8xdeA3u89syoy00bWD6xnB7FuQMM8E+ycCzkrj65fp0ufCftM81WbZo5ysyL42tUd3EFUji8Y7BUBtjjGNnYr3U/Bhvy4clJL70QIQg+YyJMpFxpQV/45TMyfgqwB0n40kaJ/hcUq1DGQw9hOEMx0AE4/cLG3oZIP3PG5p+tOjpGYDLexmKs1zb4NdJgdwCV4HtzeZvUe7N0VejVqXDfBjlY9QI81UY22EQMmo6QoBQ1C8bhpx7tcqGl4/vj/xZ6ZUcXyuiDBF/qLCamgxf7fgUMsihLb7PtOE2iSjq4P/vMefnIGr/IKdlbdUUt5vs3+JHEEynBzJiK6ysRWPpIUl/VDlfBLFEvxlEUXWlIqCLZ/x9PWwoiEfIB6cBLAU1nUJvSke426CoDIaCHBYASgZsNtu5i16eNSlzyRIV4d2pO2iJVfkzlHyBvZE6xNXvmgLjyJ+gnPLSRIk5E+kZQot7ICPR3+MZsXhTASYcD5ah+Yf0o344FcOl245G0alz3fYZrWtHWlfwy4wJnb6x4QPvmAiDmo8QU4ORhLKOUUnASTM/o9WmasJ8K5foXExxleEi8RGLFt0P20BQWNpTWKLv94LYG/4UkBwXEbFM9JsEElB4pC8O39NvaUj9pDy+n5rAcLpz56HriOqQijsaZoyajLvI+m1SVfRAUncpzKKWtma7q+EPCwhM3rGrBmxkGLjBSnSuo3GKPIgR+UDDuHMJwXaE17WnMN2V/a0TRfup2SaGW7sExvhKDPqqE9XQqGOQiwQlQULzCjGiPZZTjwxGk5RT5HbFWbWztNbe5wC9Vm0jNOyUiCWNLfm0jllx1eUxTeINvJfNnu96cs80OuABO9ZamyYyZ2XXa0j1QtVSzJxYeXtOmc48HnLCUghbcO3Opp2+PCToIhE1qeoNjjLGvwsdOeIclRiSxmhex3/Or2Hrcube36/PUb7Kz5Utjk2k5MSDrBvZW5J5P6tIuj67tGEOpDkjqQc0yCktboCUQaqs6+gPjELicdrQ1GLAC2JMZCk0AsALWPqaRYNQkorngwkFuynfBIdhApHr9YuGfdi7TKvgl+u2Vse/FjoqPJGqy0vYugNNoV+d4yD25FhvY+WbRrr+t2j9bVj94cj4wz6bhz0K0WYIisX2U2Z5Y8UyW1VQW10KiBGFlmvVYAM6eFj0pjSZt4Ewr7in1cGnhwC+0G4H47fP03ZzGmqgyc3CqQOh4KdSdcMzHxuV6gNWQkNxh36Kgdj4KmoeiAYe1+exjwydYaK0EPTyWn3wJUeQybbvGx1WSvfZNhXmpwJ/TiHq+gnYL0zJ9IqD5F1kbvjoYdB6v1U8tzzwZSCkp7hTqDo12zd1SfcB7kNpBTslwLWqb3XYag2rWadFjM2iFD1chZalq7aa2U1Z4p4CuzddENQq1pP1FXN3XBlDFlgpuDsG0IyoY480eSYyea3Glrkh2JytHOx4WSHYduu2QAwyYWt7RZkaCshOPYo0oRyLLR99l1OPOO6cmWSmg3+z7oC1xAd4TR0LF4yyqXefp7SF7yyRmCyiu/27A3zYKkJbHJq/g5OeHIVPtnl6a7tXhFM9hUbxBZ9K9VOmhOvPJqzUwbF8/zykKYbjzbZBB4fLYYtQ22bJupJF+lY9aQs9NS0xdiF89AlIJ3W2yVze0yTTZ3EmyylcjkC4yHQNPTo3ny8HbxiRIXGiiBO25uhLpvCm9+aQKFP0HePjdY9kQEoNCLAtF3067NahPev62mqKaYbNY7MycJOXIEsV7wXQWfKEzScQLuCfsXGJnKhBR5mnCFe65z0ujlSBVIz9FF2nbasHJJ/8CBHVRARuQsDH5/uEFJDR09mZuHvbQQd0mfE5Js+diyc+UB59OtJ0hrhJS0h+sZENDysjtzfwBe8QDqVVdew5T2zz3l7iYmv09sMti5ILYt3XY23Sw70U1yuY9wb9JMePa3EGcSkUs+MCebdNhih+yvHl85E7BvOjzZxUlOSoplyhjhFh3OfiN/nxZX7O3Tc0jfus5+HZplXIbsVmftpWS8a30nbOh4UT6oElV/1q706Pg0H2uFyWfZHmRDoVll20uRluA4XW6IVrm2fX9aJPM+qZygrdsd6Losxq6k+rmrocah9026RIeisPfQblq6mGBuZ+2x+4K3cy402GFup84x2KxEoTnD9zBqHP5o10c1h8patYL6fUuK+VI2RfF9C74XV8CrexFvQJAgzg/Meh/pwh1aoFa/ne9GKdd7V2Hu0Kt5BnAy/n+AepVGMF3WPmXTX26JsvHaUNhcajEpIjS3MFbeXReL5sN4TRDdhnujh0oApi84VFBzDygdEqLGPirEtm7tIPYcVacwK706/izQJdnB3IjKIzKR8BkE/ch1bf7hnOgBF4pWO6lT+OLpU3EPxoJwBIAm+koA45s/BKQSh8IYzD7FxTZAYM5QWz6dMrpglNzVfaSOFrkzIhLpAvjSnPUS0AOvK4mG2Csm+BQdxkvSGw7MYGCVyarTUufpidF1zWtepNJElx/amQll/6+9TXsxfvHjt4E+bIswGm8WLRl3PY/1/VpAriSL2UY4lqNjx4+N8QS9xJ6j0l4ZoG7+ypspqHKuYrqwSu9Qx1CT9kmjxHYv3GVVq1hw1ut1FXc0SZe66VsGZxUAFo/Rs6WcbigSeHtH5zJJrp7EB/YtDY/aqS3r60hKc42zxS2vlvUNIqRt5dvfbRimVf5OQBA65ECcAYSp9vfmgV4AhSAwvqmidjR6UZiYbRSCeKviVCzjoQdzEQY2gQQiAmZjl1qJemjg+eH1sag5ZL4B4Z0nHeuhDcdSzCJj5/IhIYtYWxE9browb7oW9ShBb6tYQ48/DjM3s1+hB6DWVm1zh6sYlBYc8aS8NXRa7utOfQ6UoXoE2Czpe741T+Er9dfWGYL/25La3o5HuVpkqvhH1bavlDb17VQCjJy6xpnLp93dRKbXRupZGGktdT170oylcxUBVFRdeclpq26zYgTzJjeybO+b7YeMs+oCrsgFEx0tfQbvFFC168MdYI+fAXq0QGyRMHGnxLS+zOAnjOJ7I/evloKRehP/CzfEMDL7qlCZtapDoVgySAHGWh6e91hN1JD8ATnlMW+9rcvNMzJsh3t27nBmaEkT5t88MLbxw+Bnj0qegMfUj/kRDIJdXJV2zNteXqFszhJDXGNW0HYXmrN75eXmde7GT4P3t3CaWKVBKHFSYwSBXZ3i84pMXzyWgTN+hdIy4MlHTbMc0JfQaksUiFfFxExgLfNlIrrmcj3d5CLhDzVzzBY8uCpv8uRkM/XwBEtszZzYJ9yLDFUE/Xo87Zs9n7brtveI0+FEsGN2+Y4a1w7fSazcIFTCTgeugk2YlZuUkIiZMiW2QVZcTLgCWViR00Rvvj5IQEy18Ds/YSbtCKzDQEoDCKZdJEEAGysAvf3NFmLbEx7ar4+PPRFORu7BbiuVoGUwHvKk5Ayt+CIq10RLd03CuXeY8TrYcv4P3b8jMLqzDGi6+54vjlekgpirIB8coZ/AQJv4PvBetkzNlyAajauEpo6JvFELRyu/1iiLHSQjImEI9wx8dIcEBwinBSSi5GXmgWEwGWbkt2jafhZJTOQjVRDjvj9lTxr5BZ9YQN3uXuvrXLe56nLoe+glpbayq83JRsvR1Q68xvA2wVJ8vaNtwXmc+XFgeGFQghBzuPtJpcel3wOTP8vzt0m22S/1gUW5x5LJxyK1XTWIy8a+sa+YSOi0xqI2kflefk9M6Bvp1qbHLHdQ141Ybk10zLF/rNQuqj+BmuRGt6ab0yF9d1V3HXnW92mPXjufLx88ssz1S4ZNu0fn8hF734lFFkIi2aLQoHFOgwDiDAh88J7mEboRkQvEeJE+amYvzNknuTxFQoYYVIHX/NtoKnOuxX3xbML2E+2NsWMNfw83ox4yQfAq5bwzGNFCszWs3tF+YJHjIalX9URNHpEsX0GbqWJzL696P9ViWWhaHssJG4RWcBzxLqNOmwV68GOLzY2MT94n5NiUNBCy6X1b8c+hjNWnFfCNopdG+VUYru08aSu7mo99wUJt+25xVfu/wAZR9VmZ+EPgZUOLL7oTRq1poBZ9PK3EN/afzha77ymx0JpfkUI2ne+bzO1huBxlh6I710ot/OGQqMKruMiED/KR5PzEFRzxRIp71KX4PQqN6XnE5CXKiuFuldPUNppeuhvc9cH1M0VOaa9wF3epj2OnIiYN5wJ31u2q60vRypmWM/WwB/kTny9+/WOXNzmp9cfqj/43Wr57Bhi+iBRL06sAF4xEaYm5HlYkmeGmJu9A7PlovZ5VPzWqhGZDor3La3YfdHKuG6r2YffvE6uXWxF5KBVZreu5ko+FhEbIzHHGrlMlB1H9zZKEEQP1uczDwZ6Qm7yLHQ0YRcqUZx0vflaPThN0EvbZzE0RZanv0P7Z0j/XVJwL42UmiqXmNT/6diyY4ZZOE/FbjOpmGSKDH4MSk+uphPS22aM/Z8mm1gxgpUjztkJWI/kDaFiuXnNeDDM6+712wIXheXiA/PEaOXGmS3BRq9kHoS8y34Yzy3b+8Jbv8Yr2CrQzgsJmidXV+FfnWTK5ldpmkO+DPSvBHKLdEVhc5qFJ5AcbETCLObxgQLkgQRxxfeGNPoafwPXwRHwTW9Bwc27oei2dHceJLEE4bqROj5GJAsNbqzRqS6qBj4H6s09P/bEkcsV/xwXZTFvbHPeNG3qlH1F3Gbx38YOcYuQdlLffuNv2QaNW9zkPTo5y9XIHaqsG8oKWvRo0EEUPVw3TfTLqTDnGZFR3FNTN6c165viMpK4dV1wtgTnz5weYhEy0fYcFa9RUh0C5t2prurrbfWR/U7Wn2lexjcjEu0HlDgX/JCLRkrQXDrbacX1DYBM85cML1NrAoWCg7TQIMZrlHiDm3LHQWLnXWWPbbxMbITBHDvuL3AWpMOnlW1tsUNvaJV9tO0ZDZYpk3rJotVcmdlHthZWQnriUUPCyyOkx88dOIs5RGDSYBk5hlbjUnz+gE1QJX9710cGXlR2DAIsFaqmIEU5trVn9+rJgRMn9GzYucozTUJDLOMJ2/OCwshm3Y/XzjCuUsULaR2kWe6HlWFPN2+ys2FzYk9ksxSvxopIn0C4vWcJbGBm6S4NoxyS0eyCPhoOsss7Oici1sg2di7euJKltlbsHrZ65fPqraYFRTurnKLj2ta8r+Eiovf0IOit/dD9wjXSKdH5/yqxeH6JAMtSlyPLp81blvkJvPyDDb7J9Lb56vYz0zXpmENPjre9tWTCFnL+E4Qa8nQuuT59Z7ga5sMdn9XyK8uGF/b/nBBJwzl9rX+C+6ISNvjBa+5FQf37d5WUAZw3CZ1/R+f0hs3o9iKpC5HyZ0GfAO5XbinQ/IWOf3/XGkzeZrwo+7/iYccnWd3Tb99u2h56FnSmoCRsIgc8cmzVLEwh+Q/pYqZvh3m1EiUOJToIZMc9+SRJZWEyb7p7hRbdeEPzzsLUhohODq0A8CSjFMYIVh8eIcIx8DK0ZUZcW6BCRBQO5ZYBAZcrP1GXfkO6snTFPN2YMSUVjZPhyWmeuYVwLOl+9NR+8s8usQpLexB3tYlwilCcL+PXgDv6OYJGAlUcllwp6rCXYhVs6FSzb//t2ZPe0YcuR/+hgeDEjogoTDf/oGOrCLj8QMAM+PxiH7whAtUeod8rOlTucosIfkYewx+nE8t4MWHrGBxlLuHdVe5gUTHo7ns4m3zTeXKu1dfuBCT3magU7Ll90dhOP0zb/XeAyGZzsR5wpjOZxQPA+kCx47bcU9hZh0+lgaukKm5r9zjm93It/zFeS8PXozcljvoW84UEXcupDehqgxen3tj673dy2Pbtbog3WBkxNbBedwyjepZtlQ8cGrSUbqjBD3ldlIeNP3U0/YfbkPya5wJk7pMekm+J/187R9gDM+xkbPOjJmcdxfDsMvm8hL1feyyr697o4sSo80e5YLZmQJWvBJ5lqBt3O27by/X7jpK3COPcl40MsGs8HM+EPPDHZQz380fQQgHBxJ+lwAliDX2AR3d9W+UGJDSxDCfddwyDm9FLz9Yv+LE38+C79HBWZMJRfWCuoCAO+ZuWE57zfBDXGYAivMbHxB3LsoRnl0/b4dNlOs4lwtamqb8SR/4MnSRWqvvjzDPGmXE5Hh2L+SNXEkvL8KQ/6tDZuMyC9b23Jnt8fME0ixenVfQLccquFVNzqLQKlLPQKF1jyUFa66whp4nqjFpI+D+JOmzftQu06Tkiyf5CG8cbPZEm3tXZC81ydIU7kMKxXS3LyF/jyONxEq4AZyvfu7uZu6AotX7h9S6d76XSee9xeYh0dA43I71Px4EPObxqghkHbNZJcKfJ3NbVzikCCcm9RDz+tPpZkpzZAanpaeuDauh/9IGp2GVRjbssW7+/QEOE8b+m+Jr8dzrBtC9T05J4ssP3E6O3xEGJKgK8hmrzumyshZPfkieV2z78NO5pW5Yly8Z7eBfXxy8fn7BqWrBVn1h0I4CVrjcI1SIV5ELO3x+yNN+cWJFqgqrMuYIVP4r+DMlXaFumzNZg0ds+C9bf/ipRZBsrhrn4b96dEfYbFfpeGQC0iMYIClsCWH0ly8kRIA5avVnLbQGbOhttlv9rmCMRp5otr+Dle+U+Kgn5c6QHTjEyn9FWPmZvU4tuV7pqLUuKvjdDeRjAYN1y3TudtWFxUXrfzHGFdl0GdA41wdwk0tUQfDgxFu0OugkxGrLh67aXqnx7FFQ3lnTxyWRWqSi25uekFhJUKSjpLA4HPRQz7IgzMM2HeJgmcqowpbckgY3DM4ZSg9nCEb43rEiHzU9wiu6kg3cQNabGsAuU3fKw+a7fT1wRhW3vbDeLeDvbeChcFjyQy6sXepZNTEnSCIOje4w6Q0qbn33j+Do7ZS8qmzwFPsE/gLI3bF08oVHKvTZ0Wvi8jYf9q1Era9A9c61/gVdraV1DeP9JEp+s+VDXtEqf/79ddfAp7YkALCyvf1C5XIct3c5/QOIKZeTYdCeXVTxaGen5rVkJ8lyCCP3h5oG349IYVRCFpg9niNDlj5cJUaxYtWBkflxlxf/+17KMyYpvkovw7ScrCupBT9jzXEh0x8bR9bYXzl1tHg8rn5yG8fw3i8LZczRIAHTsab4OD/RsHwB0eUBZ/4nfJVIe1WB4gML88uBFTEj0lKiTXpIKrNJs/ry3PuaJarFmJl/brgoH8wrgWGS4prtp9V8l2c21AWqmHGxi/d9UBfoL8jKPKF1rBm+s8jP5HcDWG00c8uGFz5K1ynJ3wlL3vjsNTEt+ySteJy9pZK8sz4Re/Unz10DVqXn7gjbeYL0WeUThbU0E0LrbU1MVjEWr6FRILxYAjEhsf5IHRBR2XEOwRNnVJNSbrM+VxBORsm5i6uqqm9oUNXBP8Cz69KnwvmioLiZTxE8m/YGMtCn4hmcTRfeh6y91F97Tk5QlnIff+vXqyHBPExhv/2POYfhsBcO8aCACgDOA7E85YcrV4YMADfv3kFIEOFOR7ExoDyIDwX2/D/EA3qxkzhes3FdFTuRhouWRB5QTch52B883hZ2t/ASCxX6CR2MXP3Q7vY1kzlCbS42Y1/o8EP07hLKBXZItzJb3E4mnCgcVLdNmBXk/AfV39BTzBQIfUpAUXXPfAR79ny1W0dDlKlGnQZcWvEDYnLLOss8+xG+2uSi1Px80UVxtFjXrt+g0atUP+zbBeXaUGHXXWRTd853t/2OImRoZcpcp1ueejDeHSQzPOKrscu97ujvoXdrkZ26SVSFWnVbdBw9A3OFQp/ZYGQEnY75o0bI8th/YyVlo7Q0kGl9D+yf9//+/9/8+p5H8qU2Epi5R5yixlmKKm4ORi8mTyRGrnvwyH4slgUpoc+OwDgSnFS6VSrXEMgIxX8OZqZq6FINJuGCqQf/4XikAJOo8yoAAVoAodc4+aFFQ06rkmXwGRQmJFcp1SpVqNWnXqaZNq0KhJsxZMiFZt2nXo1EWHrl677bHXPvuhBh1w0CGH9RnQb8iwEaPRMTJm3IRJRx1xzHEngnPSaWdMmTZj1lzqaJm3aMFZS84570IUs7noksuuuBqVWq657kYaNHNTXioWshzZhPE8suKxJ556loJ6MM+98NIra1ax6MWE5Ne8KZpxyZcff8ZMmDKDQenQpYdFnwGnxIkHAQAgANyu8pdUVdfWZFX/DADg5Ts5rf/+DVYV0//21HfdPTiAB7hSeYS/9o+8nTxGKsWp7ce3KndjxnZHZbklRiBUmbNsrEuRpqssXnzmtzs7NolnvwS/p3TlXOOP3Vjq+ahuXHnAODTp6wXRAdy1HFczzDtPDXvh0+4Xt3P73bzdnL/15snoowPuN9foyKPNTWGRsPiMw4V+Ti5VjnQwcHlB0MHM95rhyWEy4axffuNBQ+J78LiLD/+zrh3PM3dkutQ0ADcv4cExj9dS5rh5XPeYz22JGLF5Bpay4JkngjWNVQHkajwOuD0W9xAD9vxeUr/v0pij2NHkMVgMqsUYnJfwMnDcNgBAuHBgL/BxKXMWxbk8+SHZRIHasyPPQ+7nKmKdIRLkM11KR/j485kzFsBeKQv96iTk+YtefSA5H0YgI6AT32/XI+AFQg/CBqQSHgX3wAz8SOqyA84gtGD9+PLTziur/klAl12xwYd7VgEPFBAASCARHkCEqQBQDZjXgAABVIP0PbxerYEHADSmJ7hLhgFznUT0E7yPIsWqlMqTI1c5lkksdmzY4t4rJv7RMEwZ3FgWHuLziEx1inzOswLl9TIVxKwkzovIhZyqVGZjysrZLFMlYjNZRbbBwmdv6G4yTxExFlvJZiiDB+CbmWpTY+cAAAA=) format('woff2'); -} -@font-face{ - font-family: "dm"; - font-weight: normal; - font-style: italic; - unicode-range: U+1B00-218F; - src: local('☺'), - url(data:font/woff2;charset=utf-8;base64,d09GMk9UVE8AAAuQAAsAAAAAEpQAAAtFAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAADZ1BGyAcKgZgAIEsATYCJANCBAYFgXQHIBvMEVGUT1YA4CMhN8V6Y+ESKqtYNa/7eX5uf9+293ggTtzzIX/TbebHn4BRAUYDxhjRyoJRKYzqIY7ogcOoxgqw+zy8U3nedu8PJ7CLCoMApP9/P1fvRS2NKBb1dPNOiib6SGJvA08mSSVzFklmGgnZQ/uJTCXRljPU5TIVcbsZEPJOW1R2QCABDxEEYTdtXXDA/JDgkH/fa91LIT/9dG4Q4rYt4g6Pc+RzDoJEWx7uacuXGlHiG/pbr19Zh60VDsQVfRGyvvjjKglNPU6PBxMmi+BRT0KCoGyYPv0d/xs2avyctNIq1yClz7on1/XVevkpPynYb3mZW7tOoVYpl7Pq2u+RIK1cLhHo5/6FUl2vPkU4/+cE5XTIdHdUuA95tM/pS7853XvX2y1QEfBLCVz/IQNH735hf4COk9Ze8lZE7i7LihBBpCLEA1BaB5OBBjX4gRY0rNAgFej4joAgBWITQqWISXwizQrJCxGIh9xFy3WZYAkv4g1PxzvDF/A9+DH88wKNoE3wmhRcSjKebKcKrQroaOtZ1jdslDY3us3utt/W2bbR9qtQ/6DL9gFRag8qqvABGTzrFvwDA09DCcx4emouHuK/yTc1RIaDgrFVP29J/GgSVFTxAxJ3p4B5shumwkAxeAz/hgfi+H/wdrwE222fulMpxbaUEEraifecgs8dgXK2Ul+du0Vq8Uil1PqwZK2U8/A0Rw0H/C476GAtMJ0Duc4uk1YEGSDCf/Nyxsgc4bpzz9jMsrSSlPy6E+YbBaWrNzsEqyf9GTgvOT41Nj2a9k0t1TVK9rbW7c+RCbk0qBNBBSzHDIiYI1DBGdj9OpIxNlbd/d54hk6nsguyCjOK6Nbs6M1BEg/f0NUpsrSozJicRJo5ErrIe3JCNC2Mgp4DNCIYB7F4OAxn1sI43IPdF0oybjVlpy60HaN3VlfsljE529WVipQUfXqqNCM1RZ8iUWxQK6KEDIjXvjZ4mdes+BhntegRMBKP4AbhKM4kxsjkwCPuLOuXadA1SQqKM7ILZHtaG/Zmy5Slw8IXTvZxpdPT8GWXKJI0GzIUNHMET7APPIewy6gVgYgbhB/ByLw5lTkCByxE59RUnLhpPk1nAIMzJdvzjDXp2+mW9IjNgZKE2PTUBJmnb7BHqqmi8AT7CMxs5G07e+hh7V1GvK9TxBjhAhfBMpOGWyLIZxRjfMTxrOBIlXWSuMvKTDKDN+nMFYzeOA/uT0WNJMMTy5lOWDCqmKEOApWUONCeuQ0COzLBgvgMmT2AxfnjBZFnhY3gSkDEhHz6CrvA5H/mtMnUcPibXF39/F2lhggg9pXeF5oFeoVC/H52faV6x/bKyh26j1qhUiukeJG93AbOjkMYcwJmHwYd+ykS3NeuKaylHL8+4cVthjekZcvPIezTmzde3FxxYJzT9Fn/jz+07JbshEARUb69ICMvI0+WZ3W5oK72qGTX5kjFvPD5E2Q3uPWhSnvpDDvOGc7c3e53OCHBcj36pxRe5RYT4AolfK4/2LG+e9ce05zfcfnUsf17/U87zJqni8E2WEBH6JISE8SJeYklkdIObF0ae3AeHRxX/iegucpePldeDDRYlxsK8zPzswpS8mM3DwNBdMmqcytdHVo9di2pnOk1d+GS1R608An3L4EDu8R8Tsols5bv+OfW1dx3jiSFUfdgfvuHdtDcI15CFotH5N0bCP0kIIKeD96aZH+Xk67hYxaPlrisvgjdUmUQRhnaSRxHZTbltOS10hAhe4BwCmxazwP6LLmyf/WcQtmbKvJFpNMdTEqw0xDsiF3cZcuoyOnkISrJL9UvLYD26NYuhJdcb/0RERP3oCuABYJaaulNnqOOcsz32zLfHug4jMeQR6m3MMEDP5byO2e5ueRMynvK2tmes7QHgm9pOug5FDMpHTdZCaO2ww5zB3zAS7XgpxXBtX36PdB7/7o9zD3EuUWxd05vPZgny8jW52SLt21ocK+Q7iyvb83ZRjN347IzsrLFe9Y1z53s7uPmYwxqjpIy9+Pmq5TenuJVh3xvSC0DIJk1B3BN8Ht9I2nxgr5stuehvJb8si1/ME/QetxCHqMOwAdyMaXa6KNZHk8DWC6zB3W1YaFiH4XKdeWG0tqWHa2nKqRLPrAmc91uKXM3zlQVoQrwD3bXyrwMusoqsR6rP94JSGvaBlY7RdBycv1Jb2VyWIxw7QqbKWeFTJTa+MtNxpJDsJQTsP6e5ZP2tvPmjY3+RTLmp9u+ivqtZvHR+aY5a92jN+ikdf6ksayhcLvEZEpSKpRBs2VqKsWLrKWYX2tPNejWuyujFniG5zeqZX4aUmls0ZyXOOJU1kBTWOrwTxJu4OGsypWYIVW6B9xMYItfbvTV3SDQ3tp5ZncTbzoe0dn15yQTjbI/5Yp/w9fv70jHw9NPzq3ZtUAZrIuMkVW/JmEElbubxP7UWjz9UCZpKG0obpbsqlEvn+0V4uYVWlytkin9yEDTtqgdkuE4Xlmz/W0KYxuu965jaj3eNrW+bqp+bebDLdwStvT11WuXr5SUOGTCF8kvKCwsM5Qacnc54HoqaS0J3pROG66NicEktvFyjZtCL8Yrj9oDY8LidrTlshaGscxrTZw1/C2Eem6MHkaJmLiv9vepCy/3v90GKHtA8dCqfvQ9ipnUCYsewMJmUNAjqY141Ercdy4eTGs+xT3+Ih5BhY4PXTgKO9PJJzadOCl+nPes7J10JBXzb4qLi5iJ65c3oGCAlCO6HNkzfacLgf5l2HDuUsMpQyIWvD32uW/XoM+ub+n9anAfY4uP4Nl9gBXXsw2U9AgqCI8ajh/NxAOT25LNyc0asEo+kHGQHk4FTQwc9w8ekwADD8KjRhjl/9OYOCVW+FiYQryI5lK7fuxadreXlzNQylO4xKrEQ4YSAGA0N5CtXA4VIPO9SdqgF2z94ZQur+spQEznIlnQYpn5MPaCEX07+3+kpnyOV2Bxe1Q7PLojYq5w5faFV0j8i2LMJ6/ueFj9jIY5VFk7iWdTWVsy0UEmvPACPKh7GXXQ45tk34GwtVWyE3vJ02ELtjlL8EQf3B/LYmX/XyBO9IlK02zSbgqnMZoc57VrRvtOnjMubqyDUM/1w33bdQ/h0EMR43Zk8GB3YzA5/ngC+YgyAQuGyMSyRFOM2E6l1ySsXZThoQOLOAmHJoXPVPnaLbOo/hYzuZwSRpV13TdMNGCNgcLrc636tXAdelhmoHGwvlhv3W6zp1t7Dtxg11xb2z0lhcU5uS223bsce/VFiMxnEYEQskF9SBHTZsxbgmgk+EB7Ya5+1AZzxONOGLhOFYwcfCLKKuBtJXIQyQ3wbXlXELRqZKIGuWQZAmvioHxMFD9x/Tz59b8AIZaDsJ6EUH6tzBvDt/WWp+nJ65Dv2Yt5E72iAJVEStlJDyaUJst7ycVyqby/fLDcST5GPk1eIzf1699vcD/P3NLSL2fkDvI+cnDyg6NFq4VWM7XzQ+fbzjedrztfdHZ03uy8iHR6d3zquNexFxFICiw5YO+rDwmxP1w8Zf6RAGJT1tHHeqzpMe4bzReYEULoXsTJ03zl+INHfjv++m59UTAB8RAf8exN8U/+ApTRbj4kftc4SVv9fgAIwStAwvZ6FP7wIzaoQZUYDZokeY4uO/ud/O2I5Bb5F+NXmtvnmGU45GZTgpAVWtVpJwTWCKEI13UNAglQhAYP8VCMBh8hFFdV4C+eRGI3byrM4acKEUpnIz8+fKnINZJz4cTZqMWsOe0TufV0LNKNr3/vwJo9kIVCAJWcqAX7r4fJTj7Kx+odJvFUclR6aroIL/+hpNjAZeSOrrr9hAgm5+ztVBE+Qr+5fqBy5AIAAAA=) format('woff2'); -} -@font-face{ - font-family: "dm"; - font-weight: normal; - font-style: italic; - unicode-range: U+2190-21FF; - src: local('☺'), - url(data:font/woff2;charset=utf-8;base64,d09GMk9UVE8AAATcAAsAAAAABxwAAASRAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAADYdvGyAcKgZgADQBNgIkAxgEBgWBdAcgG1gGEVWcQwE+ElO4G3pA8hc0hxm3NGG6/8up/ZE0IXYD5JQJQigjvFYGThf5VABWk9PS4bI3/ONOAHFwCxNLNbXr539uoRZZW+ctNHr4gnzB5NNGgyUgiUYiIVpoeKSemIV0+UJPp2JbqHYhnb3wk1sQkAQghOQtP1J7bkNdbd2Ed62zY/jbl7IRwEZmARslsNEiK5EsWQLPzxKrPgHLtGmDHo/hs7NS+Xn+USVA+usP7yneyn36YVUB3suHHEIy8suHT5i5YPm6rYc++unqkcbGujbHjdmW+gr7CLfIUzhDQQrTuo/XttSemTplzszcNcs1Oz5nh0pMmTh5MRQqbf/oY/vHXP9DXwMOnc+b3Hz+2LnfThD+zwwgPdH0l+igeGDa0qWNYv8HACGOdMgUgIAA05z1MbwnI0g7uUL+FqYIF3LQo5Iv2a/iQ+zRyAbZYCgzxI1xQxXa+fHrj/kaHEPZFrVvewqux7GHn9KcT1rZn20Eb6kijsRPNDgW19O+3SrbkYpj+JrH1/lxtNO4wRg3lBsMsmyo5se5i7plv6viO3ZK0b+ej61sX8G2pFQyH3N9SwqPp/WReDhSFpGXpCqD4FiOzz1xI67ZBFu1PBKOk0k143po33Kdj+FrKNuxom93Kh+L6zcddktLJNPE62GS9LJRTphiemktn1+bjYTi/ymyysn1LXgjzqexaCLksj8sR8mGSRzehAQhUE4EcgaFUOeSv1QH/ccea54exiE4nPZ3pfKMbELUzfwI1WZhz8mhRCwqbcaDKQ0n2pIUZfIYgfhvojG9KWEoNxhlWV+d84KtI5jCNop4FodrmtCIKldv3eZv8C3qmoO9vOdzard57OV6vdtur3Y63VarxWs2lX3Je6kNT/iLbRLZj2y3xhvyBgNlfrffcbFKseu9et3r8p6m4FLUfcbf8Le39qCKahM39hoGxz+VrWyO5vOc8s1kLwlSCdcQ5qWoeyhVfB6lPBr1Kkr1pUtenwM7Q+5gVQP2Us9fKRWpxuw2WcssXttFR5Vdibqj2gTnP2ju5bpmfINv997mKlc/R+PSCM2R48nfE4sSvDWRwo/6UuU4+9ODOxNpvNYT8aSrGdcyVUVRkuuysq5FQxHF92NWdtI2uqgSgAY0CABkQIUqZPnK9dshDSQAgMFBAADACYw0mrBYzvkjzbVQwrRDrST8TBSgcpCYJbwDsHe15AuYOjthYdfuBVNNaip+PYBgECR9cW4PaCDMFbMa7CYVKrtIirIsEFmSQGuXbKRc08HBypbsnw0cCimurtT2sysP5c7/L02UfgEA+K3z8VP5dNPte4OjB/7f19JCEEAEQbNJfDyAvBmbGb7BLxA6PviHt3+eCEHIgTI7TbdTnrMaHXZaI9gMMgEgFfYSgUjpANAJzAoBCToVAQTQKyIAGEclnkKhjJGn6Dx9mTr1OjQ645TTmml9C1pTTTbF7Cs0oXVJ6yhXaG2uIJ+fN131vD7HnNPstBa1JtJa4jxjyFbehNATsv0Eba1Kx01c2g2ZnfCjmpxRp5bWFBNNTqsiwCCr53WbagEAAAA=) format('woff2'); -} -@font-face{ - font-family: "dm"; - font-weight: normal; - font-style: italic; - unicode-range: U+F8FF-10FFFF; - src: local('☺'), - url(data:font/woff2;charset=utf-8;base64,d09GMk9UVE8AAAJwAAsAAAAAA/gAAAInAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAADYIOGyAcKgZgAAQBNgIkAwQEBgWBdAcgGzMDIL4Mb4gcpY8lI6bRFpuvq4/OWEUk/8G00RY3bzZ45QTP/4/7fp977/hhqDRrEowk18nij7AjlZhdyoMv5jaxZE2Bj3goRCo0jjQxTYTacALb/XAD3u4CDzHDKgCagaf4WPsskuLqgQ3PZuX/n3qBs2kmff9/N/kG2b0PAne7CZpHlGAY9xJq09hCASUSoGTuSs0f2zl4eIgSJghGISKxbXaRXsuiPBy17rvw/Wa3uNuCe5R7qd2i2WdRPsGis68g+3ZpyKuftUT4Ml8LiPrqLUz7F5d4lxWHEbH1TwVznkJxQVFRUcPp5HpB22zRiZu6iSW3/FDmps8biI7jmrPj9tdyRFd98BQsNBZBEBFl2zjn+X2+8fxv6Xy4zzkaIU9y/93vGj4f6Sv2n94f9TP6mJ9Hjhzx9Fp4eubk6SNHH1qsnnViFoQdT0KAaDJlk7aOyw1hMAChEACF5cmnyrGLc12BiEREu1GP5QhhGG1RP8C4Wq3coCSGCSj319jF//XtoAt9W6KaQI4DVI22ALkMAAIIJP+TmbHrYDBPAH5p8KtXb689cwMKjQJAQH8K7tVQaL38SkDoDwAARJ3ABkOIABGM44iJAtYQEwTDGigUW6CBnayhJWGkZwXhRfafy2pu4jDqwF25ShQpVrUSDJE05ZqfwVAUZ+7L7O+kXdQXR7maswwqDVrA81Xk1w6+d1Rg+3GHm9q4LECxAkXRaxpCbqELSggA) format('woff2'); -} -@font-face{ - font-family: "dm"; - font-weight: bold; - font-style: normal; - unicode-range: U+0000-007F; - src: local('☺'), - url(data:font/woff2;charset=utf-8;base64,d09GMk9UVE8AACGwAAwAAAAALlQAACFhAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAADc4oGigbIByEAAZgAEQBNgIkA4N8BAYFgXQHIBuDLVGUcl4kgp8FdsP1ASYUGukvzRPoL5aUsd0c4ERYiIHJlYyQZHZ42ubf444jBJQSx8kdRm5iVE+MQgVtjAQzVuoy3NCeLku31hW/Ig4//skDz+X+jQm+wO+aHFka8ca05wkU0AYOdPr/T7b1/3q1915+qvU/NZfvdGOVIWLAlDBGgiERDEyIjjOCoXFCtt+4xhAnoXkimGEUxZQHMdA0YJPsVtqEYd/iFE7u9xNbBt/uthx/R9T2vqpikiFE3P9vzS/7buBMerAzAKTweAI5xo4w1dUdqNSpD1jJEGW4/2CGu5MhxW7YjhzhkgwALSDIdbtq7cpdYVYIDSj3+BVO7WNsifMVPs0uttnaqV0oIZgF8z3P7gBAQggEgBNVVFedXF9X7/KxFVc8zyhDAGQIZEGGIIohGDYIECOXBeM6aLDr1r0jX5Rg0YaNu5uBGMukKfKkrn5m+rTykbR/8ax/uJBNCAF0pqx5S6BQo88eM655AX0K/QZQsAF4gjCQCTRgCBwA02AJvACfgd8oKAWjeFFCKImUYcoRynhzXWVJ4RyWRRVptGWs0SpNjS+nltVUVhQVFzWpisPaDqJf6wjVZdp9jLnOjVN5mAgVwbk7Rv0q309JkKo3L5xXaTNPqqRXFpZ+r3h/FkJDUa1ev4bfgo3U7Q3qsrqtV6sQgrONB8P/ZQFkzE6/dQlUAWPeu8NXx50+Df+rrYNzOKW62KCddBWquPRqORYm7CwQXU7RVKmKO1vTSds8e+TTsbxNpi5rA2Itb+0uc1t50v+yfMeSfKeJYv598fjA76nympLqH9mgZKWrtaVC89/58AyTkm4D0gT4BBAEgm0AAimkBjFGUBcoogI7LPVmBYbfYBQNXZghf4Aa8C2lirKXoodN4Gi4Ap6Gr8Jvw78i6cgQ8jbVndpEfYZ6ooXoh7RoWhftIe0buunsTN/G+IKHMg4zvmASzEhmKfMa83sTU5NIkxaTfSYfsUxYmaw9rN/ZcvZV9jsclOPIGeE84/xp6mQaa1po2mN63PS86bJZH7ODZnduHPcA98KbwNvNu/Hb8TP5Lfy3BM0EpYJ7wlbCCuFjoW0eZV7xF406ElUvmv7/3zZ9pWmXpmdeFa8mvhr///BM3MK13nXc3dZ91F0Z3TR6fPSWZv+StGsW38zT7JTRyUg1VhpHzf8x48xvzILm/9uic8spLb/WohtqNU2Dxjvycz8Sc8/42YqBerkZ64yfBgUxSCE20zrTkr9M2a65cd1ojEWXJ1WqJo0K1yxd5kwrD2FgloKpL0SYypNXVjA9d53a9e20PhBRKfSFxFzDkEufIx5Z86rD9FYRdhYPx4lIPc2HDHmN65+8b8xgIb4HbAE4/L0vsTsdrBjQzOlkVXeGcj3euFJZF76Q+ALi8F1rvH/1Xn1u8neokw3kUKJTwnT7C0FsruGc3+m7J/9KeHdB9af0icGLArzNlbdh64LF8NMk3WPgmDj8hWv1ev7zc01fOz1yb/D6iCOxswTRlg7uR8BF0K1Q0F836IJIXFM4XXOdDs0QIY4JX+b+sijx0FRleDdO6WqPPJVzubEFpy4xenfNlbtQW3Ea8cfJdrkm2FsMh6LNm7fdqc5al6StbTxUnKyarcOMBI37ZOWlK5VVE8/16DlpslfPi+MCZlGTlCWTj8/9NB5PalbkqW2aPIIWVZIMGOh7tmePSfHBytYTT0x5m/YPQfNUW5xax76wcx/6JFmATiYwL7qcoz9l7Hh/pma2+otcC6LCIjtHG1bHJycUL5vdXtRavoGiUcLiapcduA7P/jFfLfy7oWAav3KBXZk7/myQm+hxwwikbSxZ7mt2npQWGeRSZv3k9HmqOiU2eLCvomA+2cgj8hJXhpd3jmCMOGk6pOHk6NmBtBdVsyYzARKBOHEbtyWCjgGBBA7G5POMgsljBNlQiovk18kR5WrjXNsa0zPAAOkyxVDAUGAYCp5BDJKQPjeJwWXgcR2OkLuQMoK0Iag2JrctWHqMEAo65JfhbOZEKw+X7uppcFDJsnjKvTtPN944bAcuZKffuE7mbzI9G25CT+uzKsu4kUyPp0STkqFMw7jfgSt+J8xxQmITuQThAtcguUIWSzLaxDXywJlIlCKNJJDohSglc3n6faZn71B2qvY8X32AK2HLwRHPfFb2+klr3921vxfXCxcnqmMGe+gajvgdahq/C22rLsqKax5IHDgKiRNHT+TN/2ySpEHtuKbGnWrM+kzauS8Di1NUbkMg0zy3LK9LjjNh/Pmb84wSS+oBlULhAU+gCEMXeomOUHInd0IpmPoMiCheeo4MlK+4UFIE0f1951ZLZnDKts9I9KtktnnG/pRoMSWONnI7vP7OiflFXPNaOgfuBZxgqIeydN3T7ec9wL5KIf1UivhsFzXq4HoKtDZ3Fq+PjS7yWRaheSvVesihSsNOVar+FJIJOZADSQg35s1raWlubm/d0EEdDtocgVBEIBLx8HWB3fkKWTSPS+40x/hnDrt19H72U3dZfmUCoXlu41ERFsodquqRE1mWUB7bXtlyA1Uey/PvFT/TypzZt4ADuZwY02dmsv3w1Kk35pij5lBjj95IAeqG7H45sNq0G+NziHN7z9ZuuyBuGyDR4Ni3x9fhmldVUGsZ7PEyEn9/ri+swJDEMa5/T0NhoS/EEJAaQorFL16jyIJbkA25cgyrbNaK0047jWl38TEtKL33jUjsWTUcjyTFlX9lJ/UV98NTk4lGtCmjHBNmTj2caY4bTx1283a6zw3repIFb7jFPDXaTp3b1dhE/Aw1wwrXcytsa/GWbcM0TxlW34amxFETwHchJ37xSHg8+5v0gqxDmFxHTmxRlxJ/CFiWBEHdl7/feRo/e4PtTo4/LscolEzInFJMqhMU/LAFPI31KkIGr4zu1H7oUJcuo2++IP4RBX/c5ejdmtfSDfA7EHzfedPSyb7KXpAbpZArtdAF1wsUV+CADARCbVQLTdmF6sk8x0r1ZlbICaH6V9jr8JC3FhhES09zHukiR4DhEMP6oR/SotQwoXuN6/uuP1W6wOp7mWhE6zWBOEahvUkez3t6PGM8acgLyxZ3in/bu8fkP783sE85yHELcuPq8RG7yxafuv7JX2heOFQbi2ueWmT5FFzmKKrDtwEn9BiXvkt7zvnJ24KwuK5ETU8+eGIWjx9KXZj/cvlpN8LAgjk2m6gXCGr/3OhJHi1bkh95PoiBvfGzipKha0Obm6PEjHeilBFYj7HXHj575+Y9U9sCS/TDcoUKaPsYYrHliXW1SlwxmySrWsajq1W4piyodzZGQrnjh3blnhdrDvCkjAIcWT6XbcF307a7e38fXpygjh3soReq+9I8vUu9LUxUhVwexbqMEUpIgwfe6X0ogG81/Vumwd8Pkd+hFlSxtB35F12Y0jPWEO3LQKxjWoBVykP9JtO74n1SEE8eKH/khRhkYaFZJRUO7K6FpNapKtWPcsT82vQSvPu4dIWR7XqC1h281D6Onzz44PCfYuCdzhJBaPu5Q2vwa0nsTM67O5bd9/+uDjWp374iU7Zpbl43GGPRlUmVUlN0ffU9ZXjbj70ljouBkhooAs7zGJbku5UKENjb8J1nhN96Gy7AXQocBwPmLx4MIA9jvvjt3brDxRhRhncna3LytQDI+VVqsZEPw0otUNwTbAqj8lHGRJtiYN8Xo3X7lu5+0+2qp3CqFu981Nxbg42a+/gJcbwV+1GR6vECbIw6gxqGAX6B0twwpwZLx4gvI0WN/7AwbpnJVTxM3zBK55o3gKUAWgOO2gD0ZU58EAWnh/dh/yC5qOef5tpv+vN26+JtF/B+FyjAoJhGHUerYklApwq6xyZNdzJJigUeboLyMvHi1vshI+ZWGhFT8/gtwu8oCDtVH9Jl/CehUwe7CUwAhHTkXQX/2bOLM8eNES/mWfhdovoJcpMv0SmCRqm73uiGCAjAq3lyytS8AfXn145AAJ/XOBEXlupDSAUYJ7tTLmVOs21vYgR3JWZ7AFCbQfJGHQ5y+JdDhDEzmHSetl4KlIDzhblKDtPPGMNFyL0ILX4O28mcaiqr3lgilgYjytATPAgvfME9ISiLsPaF4/gjp/oZ/wlpoi/iOLG/GR7a1k2MyU/JTKiLQQfvzL7G6Efm7YusZtiQC35uSiQ2+dIYbX1EVoiD7wuSgThT8yqZV6GRpQF8vJufi4gofIkV+vI57fPd1KCD+kIrpwOt0wrWY4S4m0ad0CxFkQOXA07AUSqFf6b/0hsoUP1HSipsTl/Y2v8ZQ6Zq1bZc4lOPhH3dLdBNNSeyF7fIiu347lB3dJiPmV0p6zt69xILjx9BTTj8R8mzcvE3/+gX8qHagJ5dHRXGKb7jt+85WmKw5fSvwYzpaXhf2TIScrpEv1Pxu7PQvEXeVUXki8/1NPWgQtLqanIM17MbdUohNMWDe6rqj0SAD7xqfSFcotS7Qs899I/m9CsgJOIe/+vmcZJ6GCYR9tthS9dKzCZRf0qcF5jm74PEmaQoIqZRxzWvBbzwjKwOYOWunovEKPWZwHX7M/YY1Rdaug72iqDraoW142F1Ts89i+9zFADBr89C4y2JeULaYQcmW8CZoiiyNTnSJI0+Ntoi1vzuniRqYgiF6Dpf/y8JnbOlHlJOlbG4Fi948WT3A9BXOYOWTjbqHuBJmUAyOBcRCMf2dZQDtM2Stjyk4sGpaV27JyUbIdM1XVgBR+FLwaqFKYCzQLElacqJgMMHBomekCIB+50VINDo8mDIiRxOM4CslJ66Ykh6l4GLDMjbUIqI27Xg+5biz4sXb2eFcSbsqArj9j1nMOoBlsv4D1sooyBrVuzv2dc7rbI42e1UbKoLCD865cekWmPJb22nO+bp4T1PLoPuglP7++Rk0AB/EIz207t1/SoAUbU2UOkOaJ9sKXPiPY+EI8At+GDDsDNE7DLJYkf80puu62fyf7uY2ya/b1w6z66kdj/Y0U3S9t3IlPj1fnBcZD5aVlAW6Oo2brb1oKSd99ONMZOpow/8MA8U5qUUzQhxwefetJIlChuTw/k/IVRwPa3CPiA0T6WV6wVlAXwccJZCLQmrjEUAAu4EgRnqRYy9LDqPT/HsPibvB0MtxwgtO2NVlwn+Ul85MRcV4jho6VhrTnn2OiWrdWbDNa9aG4FfZF5A2QT0XIiiavnecB5psv3RVofDZJITE1HL9YWFSDtk4KPEFcPJfTktv05OxjQ4Lb6ddTOiI/cM0YROJYeIN+GYhWW4uzpwPZdKDq7x8ynN0LybT36PRupDzls6CbVtyfRPSG1bMAAOd1yDO12DWI8UIzMiKVdLTt50C2dwixXkl/LSbiwEnJjGe5LeC8dlAidVQQts7TL6mXLwc4DeML8F+YH6KSpouwp4Y5ghq4YsOcIxxqrtnlgFCQqlo4wCugPi5Ee7WA+hUf5+WrlHlNc6UWTp5MtSTAQpyFpAUEg0vXHVddW6JJVCdL2et5ajKSc4+vUIOfDPH7DKoxzZUEoTXRDUuEBBsUJTei94oB7JpyjYcIaYnNkzlLIxzrpyjXLKHdvVKmxxCZX2qr9aRTVrIRMgGVecKsHC5F8XqReB03oxLavYUYT9w1xcISw5t/2ih3y9vL+tv5sO1kb+/Vf9WBFwIwgobewz8PXxcvzMCHbxSuEvt9HdiyPafW8uyRxtxaiR0MlEJ+hxOhgmmcBPXrq9g/QztmRTDeon4lMj6hKxCTPm7hItKzSwVPlWISeTFwjjjTrdE4gCl3+1+kx0Lgb60UWXbu3exeD2jOQiFfFdnhrdRkxvzismcFjErGE4SOfl/QtxxCbByNiYwbN33F0kMCgu5zze+9KtZd1V3kGHqkaqHLoiuywMazv9Csxg9Xflo8un08eeJJ7PswfvtLlCdDcF9SEJRVJSqA28YRkuBm27qa0IJmQYtlW6M3K17h9dWD+Kj1fI04U/zoeJ7nnjb9rnu6hRC60MEbOa0UlxZF72eBRGbl1aUZypH6W0Z0YzuBnkj5cS7Au++5fPpo89j9OgmP5BXlOou8OoIISmhh23EVPl8Fc68W2WvFOa8/ysmXCdTRib3ZqHdRpfUm0qgv97C9UpINznA6dMCunagpKMJEEYIGJio0Fe+EG+2HOgVY/VHUm1Mz8WGgZqFHLfgcY6J1AMyBdr0bzuC6pKQmMNNWKbqCi4mj9wYzF1Cq/eR32YHHdgGNYypj8RJm1rLjSKrIZVEORnnkzehXuR8FRlF/CRJO8wIbA/e+bncxncWYyZXenad8yexRqNEi9Y+hSD3f2boJmqvRPz8Z3B9XBecaYuCiOX2LbkbmI3IuTls6+PO09QiJj+dkp1MtZhnP/Rg0slIVPLzjB01yEnE9SGoXsC11rjsnE8rFzFoe1eT9UdOjmok0lvABafJcIrhhYB68mwJ6fJ45pKK3DsE5q3XLn4KoFFYh+wVvf0kLodlWSPvoH8VBeVDqEB/YLZzhTRT+iPSRurQqZs0964bjDgZSsTKpWmZbyCiurUz+a51khLAmF5P4Hxsl2+b94N99NfyiA11RJVHO+kRvSdNTezuGDqyYeG/pjcbwXJJjyiVLhMpZzUXm94LFx2APHZq4gmkf3z9zBtYXDi9Bae4QqOMJJwGqVapgKEhisBp1qM+swHc/yteEc1aQxmD6B5VfOOjxzqSJnzutJLW/88bJ58lbZ/q0pnQX9lT5UStndIScV6r+hKhAu3jutM6I9ftCnkaZzFE20JgaTFLdkN+OGM8rlM7M1FfbUtxE4RH66y/nWRj38Sxer5eciNUjJxeIfpXnffPTEGgkkfRfuEiVp1Oa6XrD4hUgrEgLi2nmsen0oud1wN4x1ATnySJeH5jESRgPhW4JAPebUjNsVTUr2RCk9z1R3ZIb4EEmCn7zwIn8mf8y0TBncqNJhaXjy8KrJ0863klZJShtdQ0SHeOXhv6enSovOoma3KG+klq8fEWqEUEJfj4HA5zVGxHbgTfao8mbxrr0VYL8GnWfJ04U/zYUL9IryGasULPS+bGoRW3HIK10syPHWeG0+pzxc2h83oeV/d2MZBsHp/uVMdTdRX0Q/PxalN9/euuHftPt6aJ87fY7uGNe9sxtIHfzldRXy3ma8UygBph6CMpfAymue2Svc5YOYfhvy64xX5hezw08ypqzDvzB8fnj/QmnMbP3+BXUyN+6uTm8TdR7XudnUaQt40q5ft7qwST5+gjExIvaBPMsaPofY955/3BANeWwKoOjXf6LDb1JBR63yommUNzXIiHvR/+o7vwK95eM99LH5RQVMxNm3m1U9NzKK/hvpRVPa9uhXdhZZVOSPDoM/MbuUFnpATdfqPAfWapCeCNtUQH9xzF/bc9GL9nUl/s+YjrrZ1FPqPtp2YML+6MhPrO6m81sRHQvNig1fFR+qfYDYqxPUhqFV61oaTl+1ODsqdtebayYAyDTjhEFXDCyHedvnHlz977Omf0ymihh+HvICUo6ztcBlZBonfIJHxduijW8sfiksubf/BELUs4m2IFkMMmauTpbJ8o3yGvEbpK/7BC6JgDhWkqpVQJBGXkYDzjKDa0AJSEA+KPxDE0XnaTJeZphIhcuBCzugn7/c19DfSZ2bUVxViX3xJubvE0oHs662qKykqO37tYy+x+Q+5R+4Fa/jK7y45V+w35AbQtosEEQTx5X3iKXQiBYyBw1iEL/+d3pFnk6iOdt0hlwhNbS9ygFYuv/5qYOVK5rJ/aA5t1Zc8fr+S8mLtX6f3BN5SjuyO7dkPy0KjgktK5FeiSw4cufdKAAEYlqBvTCqsHKzvqSE+/WBAUyEmtHYGkPXrKVuFfeLbZb07erHpqbrH2/eZhHqy4XepJwUvZlycvA0/+8t4/rIr7+NrAzb86e8nNgMsPHAiLZNfXWiwX/x7ad/IASJK9+QWb65rOFhHbHzjQHW5OHVFlpFmaJTvHflTR+LCXT0b85Qj61E9raG0tdAo97aCg4pSEomTjyY95j8mAy251EOrTsoZGSo4uJWlHazrryb0xwPv9RYqxeO2Zj7JNCZ/1JKUJJ61UX2oGtdDa5v3N2nECSkt2X09O3du4XHi2x8lVISA1B4j0ZzPjIdmoj/u+W3Q6oaHNhZBN4KCaCsKD18QOaUeoofWz4ksbvjcI58on348MryJa2jx//yRA4PKVY5MLAiYb16bwc78Nm91op3Zmw1zxOJtZ2YfuitPFJ/aRYw+Sp29sKpcjX2YOTa1QGxbVDmfhmnNrYlBYEPZ1jQ8HD2lHLoOxfyXNJs6KNw78NT5Rl7YbIy3Qptg+RVsTjk+BQXJ9VKHulFPpj5y4oqlkwNxpHGZjsSpJAFuVT7JNxEiLnx34RMPipbf/FU3ESnA73GIzIxBL82ltVwiruR1xBLEE/9eMtLWJh68LfXhEmNiZkdOsCvT2zrchhNTjEruSMDtnltrnQsamjJq6ln9tqTl3FDa6ByLocZzNMTVQ+ucrCRrTEZozRv9Jj6qlx81aIvVvyvUgoLZA2HDFRB2TTRONlHfCxVMS/4aJv+KEviV4MqV0JpXAhjFtzJWvBh9Lbkt8FEkflUyeIZK8tFRaxlHS+0kTtweacQ3KVJM/FZtZUCYKk/9PBl9dv3s1TAaDJ64dvhEx7jduDyYj0058boJplj5M9VqoW/pseRT59x8n/W81IG1b03wt5xa2mKdw5bgMp/jzdhLsLpixxDcN3IblooBidO6ocrH4brbjCYpnvHyBfZexWhE4AbFoHQIjooPU7VQqhrUonODKS4kyrJK4UVyFnTyKGo6yhvJaT1WmSD4Vftm1nOp5uN7xC8X3WSVAnJNiL2cHeNjphqWJtlQLLUmyDS1WO5/4A+TO4v/U+GspaOtek+x1f3lJh3NV7NUSFVDkWQO87OrOfz8EMBLBwvslbHWkCBWr/xuSohZFha2BWbT3EDvyxr7ujRQuD9x0JRDb58g4NCVTBvOOS1YARlPMLNjbL2SsuTtKLXkDZhAy9Rdu1z2QX4WcJHSKVTEiVfAbfhhK8oX4vJvuA/mlgXLQ/jKDgii+SZnQ4sKlyn9WeMDVNY6yFP61aiIYkt78xHEv/36XfiyBtsrWL8zieaXJx8AhTof5HllskiU9uIXAA/N9yJ39tL6DO76Ozzld4APUtilARE0TvJ/BikGjgc2y8LYuEBaDaW4yoPX0cySwYkQM8iGzsljhvXv5T7DsFQLNsyG1P9X2nr0RPaVQJklVTCWmP290NSiIaPBJfM1vhvB3fQTes6IX0/ySwQdFnbq1jPwWxOfvaQ1tC5VDbYzu1/Uj0rVK1QNbWNadJMCHFPH7IP+yIagZ/w08LFq6kvKoPkiH9OzaQZKpnaG7FGOKRSM/ZBZFjZDRjM9FN3wC6detMqJ3pqv+dwv350mzjWel3NLfK7pCQit+fcZAIKGaScE+Fl2EGqCjqnDOFaAvP9jGGa2J92RyhOblRdttmWUz7TmP2WQYyEHeA2XnGrbN6NFJTQBOWughl4iHDqkjP3IZtEYixGsrlZN4kdEfRKyv+Z0p4n6Omh/wilvsbBglzmbXpjJVdQ25GbPNN4lNLXoISU583AN1hNjwnZ5VZGGG7TNuGHwZiw+oYdv8cHa5BaEm1K+OrN65D7D3VaKxbQpVtQehrU6FaUzII5en3J/QoohyZ+nOzcy+Dwk0Lzm1q4rNQaol9J2fym4nqI8G4TsjOfW74eOI1Ix6HhRCXGrWMPrnNi9/s62wdT1Xi5zsO5E/bpIXSuMZgNy8VrRDXf5wJO12GClWA/h1ZPA8DuMP+xlx2BYgLu58hbsXLAYdobwXXPlyKNrfc83TT89KloSG9d22aC+BrhNb3S+KBLW/oBqr8emWOTEv5Ne8yldRahEGStA35BUWDnUxGkPr9+gV35L1xmycuXR/IavnEsv0ed51VenjRqU2PAZ537zouvvofpd+LPdx3SwtVQxjd6R/sD+wMfz6Vv6fSK/WGnmvrJo8ZDUgvCeKZqh9ppXmpvRbVZfufdCNqfcksGlewwn6OOKsyFfAHmlg0siYfjdYPuYn1KI3oCcT2fZPb4P2QKE8gVfBnK3fo1/++n50fk+PdBa/7nQUk0IsQ7Je7eIO3En49C/SjxJeClT63ML7Vl1+Uuu/E8iBz78+fYVHQjIuA1syejJhUf0Fe+R3Gru5rIjauK7r5kzPA/QAT6as488zbrwdIpLKJTv/bJJW/6Jin2SX9XLt4u5oL1+VMN2VAoxzyu2ZqgiMVfa6yeKKvaY7xXUeCLqSK5iZBjl1jn/T3oNV7vlDvIHg4fQ6Ed6xsaSvkZ3I57r8Vrcd31wD9Lf6KHXWxUGt+Qe91o/p+uYVXY8dG/MUHJjMV+Ub0VVjJDTlBHGOrpxhPEG86HJG3vmZ3p5tRWL9fDw7v179h6YYrFXHQUSCKLuhAAIYlk1pUAUK8vTFapVtFZntFs3x9+IyRIOzGKSMiA65Dt2xaxwcP28KFCynEWRtm5fAg3K3hTQBOB15kU1Wkj8xj9rqoOkwrZWAcvXhtGDQrDPircXoAOBG8RhzUeYBBmUmmx1xBnfAgpgAXOIAsVRByYlVYtCCkCCI5VAsy4UB3loUxyJhj5mrNQrKxVa+3BxtQSlaZb2PMPWoYihUdlHQkItJUJYUDZ1JONFA0Ewk51eTFtpyqESzYF1pdZR0ahTqsBbnVnmqgMTrJI1SO5pVEgVUFlOW84hULYWpSEapgSFggNLUTRsuJwZLh4JglQI5QkKjV4BZ0KA4QexhBKLacHPBUUJy876UeIxAaIAZ0gbbDYPEt5BVRA1HyViUV6HIEgxGDhAHkGWX+wNoGpIujIhHXdPNPsKEJStBRhhELQ/ZdZMYY4ymkSu1mxhKfAwf48i3AQdccwyCGLr4aUNQHpm8P9c/woCzhMoyz2RSSzToH/oMHJBLvioo1x91/2L93cP8eoXxi7k/Y4wSg58fkVCo6WcFFlf4wr/w7Le8AAkF1Yee44AqgGdCW+GrXDog4nruIh6DIjXmRqiIgoBqoqOHEo3IAWyVcK7iqHhdTShK9yGYOUDKQ9rUK2aYZXsBZWChRp+Kgu3d/47NqsECmywEcFKJPASkbiJYmUBIQWZiFWRMJoQAoWqx6FBNeL3LqMgCskoCpcgHr7IRzrqaMYA5pjNxiWO0Be4FZCK2DoMyOvjQLsDupOduQH17T0EiV4meqlIzMiZcmkL9KgVujgL3BmRtkdoAYfWGA2HoFppQPTqDI2CodUxg7fvANk5VXUInp9Uj+CMVn1ibwQ7h0qslijtySPVa9CuSaUKalpSp0l5cOPOr+UD9jPjopiupeTK2iNrAJY1XYNKVNP2ss3qyLQhnKM8FksNKMpylIkWpUvJkKG3JotKaPak69XRhXtfuj21hUHTsa6CPEAAAAA=) format('woff2'); -} -@font-face{ - font-family: "dm"; - font-weight: bold; - font-style: normal; - unicode-range: U+0080-1AFF; - src: local('☺'), - url(data:font/woff2;charset=utf-8;base64,d09GMk9UVE8AADh4AAwAAAAAfkQAADgqAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAADYHrVBocGyAcKgZgAIVSATYCJAOFEAQGBYF0ByAbb30F45gp4DwQRb/huCmiUrUgUZQMzirg/1sCHWOHakdUTSDHyhg4IaCSRbkq2MMpGQkBUdEOXQbNNbZDierW6lbrTK3brVNaKp277941w9w/T/BQC1WZxH/cwWYkOAWNxC+Qns8hAuhckTpDZLw38yO09BHv/PPbfNj2lB/FxuLd7d5FtfZ+mQU2adBGsSgsykRSDNJigpirdj86nh5+DE/b/HdwIFkTEFTSLBQr0d4UrFq0ujIiF3mrdpWudcWq9c/v2kViLPwnmup/NpuZ8XufpX/1E/l3t+SAE26e4zCVgFywUwKVOWlcBIdNFDBQbMnrGmhlCpyZXLn+8/za/nn/PWZ+nhfHBN+MSbaNyldKHMqhBe1njdGDUfgZO4YwCgYDo7AxQBsQhCEkHmPkOtd9+W9y7t/8+0IOZeXGTcrj0gPgFK5NIenaZIBCTcidJ2Un3aAHDn/v7v21WphQNIdwYhEEFFA8gROPC3/3Jyj4p4hus1eUUGaZhFGWSGD6DcDf0R9uwMYN5b8/P/x/f/+Dtfb73c95M02YNhEEknEiWQgp9X820/bP7pyt810hmSIHgF8v2Vwm3FR5KVczK1wdyzlcA6Jwbo04Z/kuugARVgRdKsRx0yVFl6JMSqIu3HdJ08X//1M1E2QDTbPELOf+rNex7DYd4LWxqeQVCgb7OKyFkg2n121j63AoRjKqP9JmuErfFVmFAXqHg2wF/80K+uEHFpRWzI0qKS7Z8UwrInoKQ8X3MfrHv48Z+zlW/7I1axzR//1HldHqKcX7r4gB2c9nlZm8HdeQ8K2HSPDjYW2KU6/vlILCX7nfKOfs+HAAGSwCz4AlVllvh/2OCfWKWCnoCvAIVZC5qEmLTn0cBl0x4bpSFQ6p1aRVm3adfAOG8yvjADKaEVOW7EIDB7KOLWxjL/s5xHGCiCSUCOLQkk4us1hACfUo0WKiDycjjDHDAHruM8hbPjHOP+fGpa52qwcMNsAQI01QZ4YzLbLCizbZqdFenbr1OuUNb3jPJw752o9+116D5MIURMiwxhF3vAkklgzy4MJHSjVnkaOgFS1GnPi4SRUtdBPnCKMqu+XVKUpUaKWjHvoYaZQJppsjyxIlVntWuUrbNdirU7fjXrPcKhtttcu4R+WMzZ4z3JhEGqs4xiO+eTkxSU5W8sOOOGcjjzLt0ccSW4bizc1FLJcFoCCmiaBERoqR1PhdHLIywKHgQBaP0ktzUkRifpWLdWxgE9vYkd0UiFKRjg3ZTFU0rGNju03OKGsiEn/ONN3pMwDJExvbt3FZYqVNLNWvUjxF8+5CHn9kLdKa2WXyCPRxVeEBLoMID8VIbMi4YD0wOiGzlM1JUuIFFkv9TSYpcV/PxCaFpi8x36H7uGmISLlcFFHBTDJXpJYa4FhA5TA8tpso8mNuQzv47XmU+t1gpWRI9xM6XN+E1hiieRHjKaRINriz69vO5tAQkrx5aoZ55Pet8seL5i9aDBm+KRkxpvmroW2pFgvaJFpbv5bwMtp6MxLM0AFDurTtBLhOpdZNKH7iILkrk4pdQ0+nIUNJdPphGgCo5WUeKeoKSTgIMq3gqUr8ztKdO4gYH802NKBqJEcf+U6js3/sKD8Dbbz4GXSRCprgeq6nwIKPBbSZuYba9BRNr6IZM7nW5mq0hmeYe8nQoF3MwzgixU5rdm2hV1sGliZ4/qXT/GLRlosmwLesE32CDrF03hBCwLC2zvB+tdnafNR2btU+8TJ2sZLxCSNVSWbjSy1yp7eIZVNc7SiONlUaL4Lt5ZKGRAApRwANlNPZ3/WFRnyWHbPGRSD3udjGGHcxwUssqNkBXR4RSamKoI0dbMlmKjKwjk1sYV9qVYW+jR3syl6Si325lerJGGKEFVlNMa3UlqlaHVAQcXoA+1qr9UIKJ1ep5ix/XEJUqeYaUyTbMjfg8UxLZzlRne43Ic26oyETPrlhkGv7dod74eH+82LI12Mkj3OVnhRh/ptCnP8lhYPTm78KibQPfFIvSW6RQwEAJH3kAdkAAuQCZUI9VuBAK1igDoyghWoQggDOQRnooBPMUAl90AUaaIZeqx5YGdDoUTOsgRoHW2VXVtWOGNTC+Sb/m2PVBk0uJ1k1blCA02NLgl5wgMszCOBuMqRvSN4KR2GsyVmkUwWm4Axc8pwvcAOuwwA8gGtww6o35m6Xtwa8bcw9gHfgGTyGD+EpvPQMgvXzgOkfqC1IXciXl6AD+d6xaWNeGEte5aaxgpJuGTNirFiSEqnZAvMjLUhbkHZaMG0MPQrIYAEIpiqAZ84xbL5IN5VfMy5K8Wa8dbb5yKEsTUP+X2rXfevK9cEW2Jr2TTuyv3/MOja3bS2jyZqxPWlHemrXdld/uw8MOe5j2ZCP4fHm/Pf5nNwpnw9qbL1KVooy1GjNVu+pphXsrX2HR1fRtXTyVjrDx0wt3vRbXszM9F3r9Sms9/tjp7DdX3RV+23tq0OeHZI7HO5Q+3/2HZM65nYsG9+j0/ROBzpVTdB0Pmvzdxt3m1k2hbY/2c6wffr/bl1OdXlzks3v2t8P/l7h/EcVrNqltkIXqfO7/tZ1e7d/dAvptq7bk1Pdu4/rXntaYo/q01f16tLrjJ273Xa7Nvmt9q2T7R3GOBx2tHbc6tic2t9pn7PKeb1z7TQfl+2u7V0z3Dq6jXWrnRnnvtPjZ494D/PsAM8Mz7dzQryM3lb7bvC+MfcXH63Pft9ffPv77vN9lRbqt8KvdV4v/3X+bW4vB8juH/b5sc/WPuULPfpm9K1bFNhvbr/Cfn8tXtR/Vf/SJa4Dlg24saR1aZeBSQMPD6xYFgzUBd9avizkQsh3K5YM3hj6l/9FYb7wtUGbwr9ZfXfEB2vOjXScWj1s7cuLgme+sihk96vU0GWvel5DYabXftm08vWm1//X0KPeOzMlYk30v1HXxZhil2n+iHXG/hqzMHJ53P1zQ1FB8Q0JS6KTEuoTrp2/IrEkcTgpMenBBSemzh05LvnJReelaFP+Tnwx9e1L1qcZ0pcn6NI/vCwlMSPzuaSdWauSx2RN0sPpZvr/aQLG51dfkf3WNaekHc756trJow25P1x3xpgtee9fP2HsrPyegiXjkgu8hfj4hYXv3fh33lqmjjnMHGHeuZkmprOfnxTLbnneUVz2IQhgFYTMAj74GL06i9JlICgJeCSpdAEENzMMd0rJFY2Omk0ej9DEoAtFdMiwx7OfYDuRe+0kcDNUBJTlaQxYBfj9w4c+/X1y7IfjB6SPZIYAMrq+GmBoC5ig7i+6foMOfd1512cLi6ft7vzRIkVJwnuD/Ux2CDNDFRqq8LLJQkzcRiyu0vJhotCko3/3Edoof1e7G7949603kxAiN8UEhcqCF86fOTBzs1ypwjM3yZua8cwdcrUSb/refWvHZCndCouLxYOb1w8dtCXa6n5kes263IZctHZ+fbkY76ZYlW7NXtVjR50FfBcaemzQ9XUKCfn0DOv5k+vyMvHE7bl6DvT1KdnQ1ibm5w2f9oDzp9U6G6pLK6lMM4sD37WNjOMXlwxL7bCmbf3+f/lYH7g1T0MHyWNlUhYC6dz2L3/OONZ7XFJwcP7YR82/+N52GchVzVVWtkEI5Ql/QXS5Mt0asg0kkivnW0PGRB9EmMnNUUHKSgRip66xRJv2Fz0UlMOjnIhRwShZ20SE3kJvl41dBiFZPTK2RPWULyE4fTXPVYysOq0aH/2dNl6yFUHZ3QZq4S27M5zocpbV/VEYXCaXMnVK/Nk3a/a4v2mqu/3SK/BOjbhx9cln7WxJOcuXqOeeTB9joYQ5RTlHbIgL0pmgDOzJCQ1Ly9nVf9BxXw7UDMGiz/6dOcTX78c9bnos3e+PZX7Kxt47qr1vNQTFZZIcQgvGEJRAaGPQzKKUHKFn1Gz2jArNDIZAyICUGErJFno8Jv9cIjOdITz2DLMqqnOxObey3LpfUlqI08U2Xpj3RAomMYazqxe6FQooATWykc0fbFjpylfHEZjbBVvY/NUJANBhTzZsO8eDgOshjpJX84VZYIRu+OOmrVM3fusX5KnRo0+xB5lnj4WAHokehGKj3LVD/xXseettxB7odKtFJ9sYBY+CXt35i9hjHO0betJ85f7xQ4cf948KuJlWmfb4zCS0HVQjn5BenbsMK6ssEdJsi7V4TTIlHQeY0CMZnlbAceeLnW9XWE9KgdqNmhL7kR4ikoPIJEggBR5glUPBSyKylCVM5J/DCufFy7TPQ97RH15aj+5+Pt27+9cdNellBt1ryAnYf08HqwovkfsyRiR2mL2r3WjD71/wbTq0V7w7iw7vPSsUVlwoQhPvSCYJai2EoM8WC00OUd8Kok8yUV2MqDyroQaqBhsNehnV1ta0tq48eIb30JCDvk9X/eU+G5eks87kbbeO+Xp5SUmlvHTErndA1WFhZ49dj2uEGJb1QIapHTwV0hkk5RiZzd3kJYI7FJN2Z5ARhcO2mGt208haLmPVD+EYk+zBmJL3G/q9gb6gCec5kSxPY1z3ndfzN11Znsa+z+RiRTOeDN5PbmGyrZgJ9QMqhjYILKyC0MYq3KOUHJGnzHKnsysao7baXIiYVBYQUQ1Bk8BxG7XudT0uv+aXU2WSi+drEH/8wQMxRKEjkXgkuWm7xqwygSN7q0UCqSw9TaKxS5A2DkssGKq4n5w2soHqj6VtBT0ubfYKvHNji2rWlF25fvnThUfGGhlienF++HFo3b6W9kjN8q0tu+LYMwPMzru5u42YDwNfmkHMyaBCfAB7DwXz+XRR3EZ6nu9jNfJqK1S7HtzcqPa9ZFCZoMNiY3pUAASYLgIVfDqZ8BwRp6Q1PST2auiign928l/xpo6lwk02la6Vwk/ThgMwbFTzZ3IkyTIKMxSgMf8gDJEMKkQHMHIoWKX1eoQ0Z++61qBtEdyScGI27boqbrmRCl3SSsjZmVI19KhMOGUmT1hcQtHiaAEnqOwv2lTdBsr93JLdV8NXhruDImO49FJYdJ/I297fYgeF2TUC5PPLfmF+PGP71M/5AdRVbayB6lDwYhE8tS/VHvjpzi9CG0uL+LlAu7ar344Xr/k5Iiu9nFcOC8ZhOR9YVL3gdN4ZJqdEnICmagN4YdHXAXe+keSxS2SZWZIWaykaNRn7HYarvx4U30vmC4sQyscpDiIVIAGrbOUjeBRLGVnKAUF6Jf9PwnToFxAE3sQHIWDcCPMrRLfS01+s136FDZW/jBPkcE+FU9rYV1JhTFoiY50NCfvPggmwA7+//Q0cdlNETgyaMQK4JJ942+UfDI/rI3R+tolOF4nodFOaPb0hA0FDFmQatvEcE8RUUM+JJGhMYTrTi88BHTQwmFaUz0wwzr9Tnn1bMAOiI3MjvP1wACGRxSbEhqBpUUgCqhpsUCZVssIlseQaJExRclMbot5iB2EexKqp7Kp1L7+es1HNlpQbSb0Fo7YDXQNxG9fNLiLGyp7k5Mqb43qnAvx9UWHe78lx7CpxBhNOmMnkJ+ARhSKvEF6cNS4Iw8emZ8QL2eoOPkwNxGadmqj2AsAlh0cM41bmx/ehhNzqNj2em621m2Bejtf+Vrhh7SBTHaZ/gtgDSK2xcLzZFmRUEZoKJHQXfM2M80dXFEjhydlZhngwJIb9MhvdXSXu/tRn8oHb73H8QnftkvPyWkAoxl9NUyaXro+X5HRfy0ax2Y6Sh8lNf8zkfoeSHojoiXxOIoiabScwbhkO06XsTL/AJZkE+4vr+yCpeJQi5+XzmOCPaX0zZb/2fPumUn5ZjZi+CtF6KGFWa92K4atn38JyIqfjkwI2RdDlYo4DIwXF8q/Odc7jXcmfzew/8NJ/u4WqFWkDT6ePiyeKVvQLNkUR2MOHLiha887JzhdSMnpeBeSYYkcOLpbDj4rzbtnsQ4OUxJ4dM3hvYOVgvK6h06kyfBerix2Ski5SJJ3Cotpz6jJunPhvtoxm1hu2Z4l5XfYHTr+2lBe2OVx3k5vylrioaVKdv4jPKi7tsophXCgWX+iSjIHX9ZWtLiYUzM6EjBXnDBcVMuGmvZoupYZCpC2K1a9jTF+13sN+D7XeeTWBG4NY6AfzijSfaHULSyBw+LRcc7PGgNDos708Wv7Rl68QcNLL+FLF6u7MhrsLkmAr+VwLt6htD/CbSwBZnEPmvMrFHNwn8/u7aDEvuG/P4EHsbzEXCz7+qd1l+nKdWGMd70RMA2xiNrIyaZcuGK+40Jj9U4KHyS/2Wm9no+i7ImGaqDoDhC7qJrDuQRQVKOTd9D5DG1NKUyVw3Sbs9F3xtiTQYyjhWMPhBxl3XPSqQOja2R1EY/o1ziMTSj+EAKLs6t0IPU/Rzu0t8EtdEkpwthXthUyIO2zs4wx631sPzpvqjQZFMy7Oct6yQGCfFvIsFZCQlpSb1X3zPsd7rLX9hYpra5OZhIPWgj3GbdgbHNVJ86A/ybpekhJaMUjBJDZ0FqwwbuOgYdNLCQV0wlvhZO/3XvgomJioWw5VAA4e/LygDi/1BPXV1fa6fgpu0vDYyZDrYlgD11w/hXvFMt9pbbmqwYj7VzETl4pAKrKQc+jIbRuD+hI93uebPXTJ7vDwtPTI8MnMWff2uKlzLA6HwWK16Us5bCmfixjlodDCkQZbpJ/efvECcV0tmDdbadB3e2w+FKSYGFfKKMQjL7Gmv+xT66yQSRzv0dbAl7ELgkaeii6znyLiuCEFiBvvXy546rW+Hm0vyN5TU92BuHGewSKeBC/2XNtjQTw1zINx4yhhkbgILFyi1IndL4QDYr4xBTCJBE8Lrhi1cc2Q6JkEHbK0LGtawZ4iastC0iwozPAp774Vqgt9o2tx0IiSrOytkAICiX9sPdSbwgm1uyy5UmtaoKvuA+XIW9MUyTSFosRqcXUhYUotuJApvkaN6eSaPoUyWclsHrzmVHWrWF5rATIhkaoIXLspgcDG78k2Gwk5lBTSe9uEPocQ5PyAGQUgFaQOtKs2M1pLRHhFDCZiucFLJLbU/j9VEg7FAYiAYQyYQSMvgsio6H9rL1/Cp2zUVvWCE0c6BjvO6epa0NbLXbdebUltX75gYer2VVpK5vPyCxdxRm0osnAYsI0GXkwXZ+tapg9unVqxeWtTS3075dSjxrzCsrLC6sZaxWm4ILk44byM0ipUswoqwQpzOUgHfQXvquh5eOwHXnQ0WsCjmYR3Z8JbMtRXWRId1fQ6MT5fMWlbBrlVh9hih1lCQ87/A0Z9G28WjFDmtPV57CxbzDGRJGNH22qu1EamXTAximjLBKFdYnbQNIoWb+UpRAsmUeHETohmNwgCLRAiCcJggPBJReOg4RTSD49Dch/xnca2s0ged5odVoNBEpeqdGmEDMvQHcwSqJD3eFZSlzTPIIKYuilj9JsXGw2V1issPCIoTjKDj90+cv1AvY+y3pt7g18MTSEoB/ogi19RNfekessajIvj2QbvgzBM5t9jD4eCRhGJRZpE/IthTEHKHPdo7RY1ibFbWsYht0Vx8WvnXSeD0th+2GUGEx35IACJF0wVHQYhGZbc2Ib4xh8rFDM0mrgIWeANQm4KWjyqEQGiL6Zotz5KCEj0Cvruzv1aG0eBb30btb9xtMZn527iQJwPoACjfkKT9o3ZbEAzJB6bEKpPlM4QQMnCxKjwEfu6E7aKGbTT/NfzRWyg3MdAPBjHd8IvxOMwjAHFd9LwMaOntsvNBpwHSIlTlDxOBjygeIAAqYrvbEMUw7dmS8TrEKJlM7FMEoEnCUHDhhQ0YPMpUHIA+59Oct7MzUOTpyPYXddChDm3GekoLh35hTElEirl1At2Bst3eUxPxCzs9W2gQyJKLs+Wmwz4dlLpCZwUS1DSIUlM8BLFjA/OIxnket+uD1mwQeZ+Q+zgswISSb/0f+OxlghiSzrIXwS+pCpmiFBvhj5i4aHfITjxTnzCBpdShUGEm4IZHyQ26c6IlHrTxy8/eB7WIfFQ8fI//Ngf518hOKlF5tXtIzB/gpgk3/cNcpoykbgLKPzaY6mV8xB+CcFNcmpDpLuOZ62Pk3Eb4SExpHAsMSQ+jH82RR6QdEwPCHu1r1AZmrG2EbubreJuchf7upUf/icZniNgWYKv1JMonaFk+hZDarWuAMYIYBuDmQF7pD+VzzevnrVeeXjeZZeHLEaDgVHNMWzsKpSdaArU04W4Esdu32qK42Sv3WQQakPiuNKioAGlHCooJrdygt7vBImwG2L1tuDCWV7RyX7AHTz9y7MS4fHUS5SjRTQoKkezgN5PaKYrsJZP8Fo+JwJq/zQxC7t8+4XRENJHNU5qlRdLdAfio4mtvo2teET0yMcjhfU65f4kh3nTYk3R+VIAe0UD5yvh4sG1OXi5cM6MKPgvhmD5iMrN0qNg+Vk9YPiHoZ73aXLmRyqe1JDqpSTbRwN6kX9YFIWIdWnwFr1KWKuPpTlRflpYnvJyexBzygc/gLKVctqXOAtq2GHtivUOu/iusCb5CQHLJfMEIAl7HM3mo3gbytc8wJIEoruCnPlkACfLnhrzmrW46WHPvvQXx4t0ipzbJ0H2sIou+LigIP+obgBuYWhnyl8BjK7UbF2sEpxUmaOcd6fcBDYMrWKCXWXhdibBDOg+ML1LOjf8gC4dsT1uDIOyqIzZbDwtgLhPq9pWfBzb/HVZoqbleyp8gNhwJOGimnPGPm3akRd2NEq7SxXf54riw4NlUwK0SfvCEru9JYhRsoEjkAVw1Asa0ZtH1BJHq+Kp76GphslniE/qpdCT6RUuA2gKe2nseLsl1sygB9PMgY//4UhXUIdvOkohGm5GEVpz8LSuXE9xUuRcpxYW4dwkz+vXNPVsm3vgRvLr0hWT1XL3yNjjCp5zQGK2w/QvjVOFVdmHa302XmQSvGLwxlKUpMGbVaxKJy8u5eIoq+HpkpkOI4S85ibQfcmW3/049n8NefxE9k+PXe9s6zT6q2HGM2pm0PqInAx7rSfb9/pBRxTBf2wgMPercuvPfrti+OytCZlbMAEpZtC3st6nguAHUwFjHXEjrfNRpPZ9SLRwrYBS0gNcx1GsECSU1MwzBEQq+yNnixYS6THF6tcwOqZWP4cxttfjoBCU+F3Okv0s/TloBdAypvV0LGAaTl7/7WW5Z32cv7dTwWIbzxLVxmtndzL9KYmC8Uz/cpl+YZV7dpnWU9tKZXkDQ4GafmKk0gNu5W9zu1B0zuPHR4CxTIRm9v0p0sRU2VMlTi7hM8CwcOvH97mCpY3PsysvTB4osrSeFpHZVWSyiEzYU7uOd19izZciL3FRNEke+NFwdj1iPiUSkpjerAOjVikArgH0wc9Ox6MGiXCEnAYqu7DTJCJzKEeMTEo4uuvN5KgphxDGhV7cL0ojAQvOGeiiZqK+yoK4xb6pi+l1orhHFyOG+yTBf2wkMPepcupF3+0Zg4/DONGodkSFfEKy9CodXW9Venjj2QjgaYQlasqZyYobyXA4avgfxCF45ufJf3Hsur2U6MqNyqIbCbwdJcD2lS0mMrPWuDv2S8q9dp/D9JVa02yVDuLKoI27+gWR90ISsj1rjkz+BxRQHDf8F8JjD1Rdtwc1yW1dQg7hkwRebnVn575wP4+F5w/w8Y6phLHDtj04Vxe1sMW3v4xlElSqrA60Yw2BekCVmwP/W5h2xda/1cUZZMuwYVu3LXj6HN1gz2zCLrfxGzf5cM/EKx85GEvhEUXu3Wmwhqy+ETZd9wB/IeQO2/LgXGN/Zgb1sgUsH8Z+sGo3cYJI2zQJ8qdUFDarf5bGKNi8Xt6F+DDSLvtr8t+cDIVddB9SurRd9d6S/Cb4YX659pMff/ja6aWYOmB+L0cbPOQZhB2KV8Xen5y48W6o34ztHpjkh591GE0zHYkrD34umXuaEbjaCrYPs97gAJ6CHVtofD2F+YoPQLiC2+fPQEVXuOj5W888dpwUW1Y0RSfjFHjVDsdZKlGUKi9W7nvHf4yoMJKmklbxFFpRwWg4Z6qYwQcxVhZHgR+4rndwIAKDXwzTQgWYJeAtL3G+yECgn6IMw1oB4zg7yFgUCFT7CbzkYnLmKCJgiT66ixGxVpbxiGCGqLZusnCg1xanQlvYwHp8O7npYs72cNfHgv3mK40+6XbtVMo06VVp6PRZR4w9nbyR4k9bLYUTHZBcWFs20nwgn1tRTcZcCmqeSz6YYjmuhyzydrOcgaP7lwlvL+TOMYTcqXeqn5gAhbySIctWLBOjz5EnT0z09Ue07hBFTaJmsjdzzGpx8lOjy4t7/LdTloHplEJKH13Ntcwp/YA8PeL5keXAZER3/iu+jbbnaLOJLB5JjyiML3VV2WLCgIGkCuStLkEfCM3SoNiBtgRtAkAcdAfAPHqt/JINXejbLmG5gzHNDJLZlXHsx82cVMdmYF6vZgYMqrUFlGeewzobf4AKzCZ0sGTcDU38GzXCwv9rB0XPammMKbOmPHVxPMRqQ4wiCuJBFrOgawfaIQmE3S6StsrCIc8WlBjZ7C3TRZQy58sou0rL4Q71/2Ix+Ru/NA+mFpMrzmWzEspi8O+ZuIORxvIYSSGdSRsQP+42QRE4Buo9DHQFTZZ/ITaoe8RKU4DZa/drM0ITlHgEKcymsX46eoMt6lwx2dfxD3pAENKlYXbCAqwKfW65R5OJTGQWm+FgD4aJZDCnRG46JMNXFPH1Ad9toTKmi6cgcsTjkPRynr306GtNUoRc5eG5Z79CPhG/bSY84NHaDb4jbqTK/co1LGWfV/D57xzRn6r8us6E0miBoUz671q4RfINRX89DqHM2LLDVo6xV9VPRuay8A5/924oLmtN+5npQ8zxCdjtOZOl4QrB2tIu/ZklFqsFlu+GYCgO+MaSrYNOmLFIViarpoD0rtz4LIYBBTsGwLLlvH4TwmLh7VXi/v7cT32gsXzVpI3q+fPqpYDmM/YswrnvQg8athFuzm6jXAd6KzWjQotfA+FxfVNos0TrmxmI/6d3i+jTfM3xpoM4U4HW36XlWjkgZpwGmB6DxW4racYRda2jVVduQ27RmuPMFppQtUD7RFnfOtGkOnJA53DbVdbCdp5px9KkJ3EZmTdWjZdT1XZLcokINxKY916vsMS6SG/Uzt0Wo2frIsm09dSCaUAVgb0d0Id20OKUgmBgpy0BDaY7awcrC29Yr0Nfeclvb08pYVwyeb5HCNmfbSqnwuguNUNDA4L+sirbbLzOoprDP94rdiOfivCQFGkGcAkrb3eFSK79QG7owGF/HUw4gdWzulGEE60kPgATYOmQE97Rt/bf7o/w+3fFbVAvUdJBnT5xhWQ1WStur7kiHC2Wih7iUUQnFMsErIJsPor4Eo9vMXOXo25eERZkPE6NGiUPTxcRyvlbj5LGzSWdwu7/p3yor7Y0PSoXQ0ttxPGJkE7mBiuw9FHDhNJ5MHdCGIAbBJktD0EYbHZWtyb8i+FlM2tn68f44FxJy4LW3EDfEfrwAcgI8w8khYSNOrfN5byT3Bg2b38HsFrOsyGKwuRI1oZVCPnLXkndeRvpCb0sNQzieWFB+0Ipz75wKHl2af8VWhO1Sn08gI6SssxhCLWnSCKpxsIvbK06j4STfRrz3kSg2xoiM++N6fP6kQ3O0gPllMh5rEk9L4Tg06OvVGiLiiBEPHIk+dtZlF578YNz+ozPpFTzsOGL/uBeM9YRWXUwE/SSsq+8LsinRPw5FJ8IUTdVyffKUhU+hstOAgERrztdPEtR4udQ5wtAdvIPhnTjxn20JKjQkm6/H31ZtLflQXxazffrdEVJsf4OiUzAehnKBQm1hbmjG3R9a3DvUhWGtvR2GEtvtyZ8jxU+jMAo7xYiHFwLvT/Slk6vK5XxKfFzSzjZYOUqeVM1WtDPOuNz7eVWULXvylc/o10n2p3tLspmmfnypcbLDe8jycPi7QSUREAu8teQ92byWAf5evg++ituqsLI3H5ubWXrJEfJoVnF57q2QKJ/tkKBhERw8/2FL6cSKtSUs9MPFybDDj62xiB0xqMwa5VWnJGLsCL1/ykKPSaUHg5oYGLuFB+hSj388ebP95OjbhU96rkeHW52r6J1zuykN678yIHNMXlqJ4Gbd5TM7wB0wVM392roaAUby7mB0ZoK4nJ76ELAkbTo4pnbjWF/70j++HWdsBtSaOI6oVgvEHsvVEfssfH/urlpikYvgEA8BFlKAJ0kyC7FfiLwo1F8sTOAHKqvPAwXsTzbet4O4PmjdiyvbVOu+wrOK88dJ1ex7U6ugg3m+h66MGcFzc6vnbgbBuoP6NOuC+c6Yck5lD2uNeV1fMZvI+bNb1RWQKjvy6ha99Fz7lsR6EDoxVrBOmY4tBlALaUbkUwkcMgdeSjl1CU46sxIHhB3pb4tVzBqufGJcsQR6vgoloxMzoIfWGB0DG9wiWmubn+qHb6xiJmIITv63kf0pMAZAr8GRltdAnqcpFvUNbmiCcR9pDpw6Qa7SNKFe6oqeGs82UKiED+AmQ6J3P6SjH5xNwrnge7dKUXpQ85bU85DoMbf7o/j6QRj1on8JgGeEpOvecdgjw2e/tZhmT3812FX49t9oUfBPQru9HTZfgYgXux6tHmTeAqxAj5/DiVxwoXrHdSxUm0iVo8GD2XNOEZ002M23obgSu5fGotc0owi0qwlhGOwUKAWzaaUl9Cavu3J5rGYo+62pEYZqbNgcdM5D87VtRXXroAwUNqPO36wEM+30K3cj6xuxKdN/NP1uBJI8cppkzWtV5euTTGGA7Kb0YNCXF5IsUcn/xt2zgrRTQ+Lb1jvQlSltDMyBEG2jkObwAa5rKnPhSOki2DM4P0X43PIaTD6apWrqm/BHYFiDkPwr57oXP2E9KP5N/USnjG3HgcL642QYGFbvUIV8adC17kztnffysnJ1dzpziCowwk2bybGsdI3I7UKv9usa4P2FQ0T1Q6yKULgetzwD3VOp4w3pf/3QxH5+ZVBdraEP+uz4QLk1wAHRtEv/Ijrblm5aFixbOCwLYUzmV15nQuUXgYTlID9sXzsP/m3w7QbhtbMjO+hDdzrnWQfHKpxOY/uFH3inESuyuBE9q6A4CMTDKgJwqLO/ZZ9hA9pXx2MXLFEHYiY1T2wW5yCv8vJTTKEA7KeaUf27OwzDJm7RcTRw+KbNrgQqZV2cgw3CjiGVg7GKxp7XLKGpA1Ea2u0nwxXW9eVLR3SBN462tXLn8LPu/Po+PgdPCNVaGvUJTjE47cbzkUb/BrCe4CWhL8fI2Joi7QXo5igJqhbMkwbbpBLbwxnboSOBCRsctRwUn0fRTUNMeT7m4KWF2kq01d7mND9MjZ+spKyxmxsILMuP3uxqTdhOJmHaWgxk+vWHTqF/nEp/qUqqOtNEgrrHMp+JGY8hNZ64wMZRneGLNpWFm5ts95Kbs63Q6Oz+lWeFSrIZ0J0binjeLJzmNLMzo71sXQcud/TEzlzZVSUKPMln5LfaXNiTuFaGDu3TRIypyhTzJTJ5Ady6p5dGChS09sH0kC9Erm74pqVEAZudl7Mlpx5Hp1v48ndFxV2iltqmNdFVZFy2mztiMeGKwOjs/G/ZTpiqBScm+DDtdig+a8u3zj6arlPBrXLIXNPeO83HaimDqqsla3Ewfv9u58/xRb5F9mOGJPQNc/CvBirxMehAcpDJ+uvB84VO5QxYlL3Qjl0DgYTX3/BieydAcMOT84YiNz8WUf5rDYTA4tWlxSZN1LVSKwVJuVAqHGASrsxq55up77JN1wfTQgbVAbv1AVPfSS50T0o5PBqypeP/uaIGXS/BdhspLaNPhYo4ydEI9ek8i32Hq7hMFbuDGYL4aHq/95IF/QkQbB47rqpisxrA9/yvfu7ECMaN5tT7zdVa5X/9yV+FkkXJcN0NimU8yobeHGF5ZNPmCpgxk29C+NnjtbGMF8GU6eNrETC39vUo0wbngldjY3cmW8MSgi5qlAhakXHSaeLXcPlUJhrQ5bakGMBP/nFiYjrSM24qXu77arKgGS4DJimdkw48kanS27HqGAmXIK79ht/PjR2gGR9v7IZyx+HnNEzncNUJ27JiX3WmEuHRgXQtbv5A6e/7spE8io2cREmK0WXm5GyJSEiC7rEWFAVJsklMfvg+sofKroKC6pSRtZgUxyswTwSOUiNz7FL/njMpUQHYF/oJkxzlewLSFhdMQ24n+Vjj2/V+TjVazeF3qK/Re75QF7fgTYvVtUoay2M5Tdm35w5ygawvHz+9+d1OvOPlfVXWCmWr25WAheGcvKfrHSoUqIytf3bgotme1Q4bA/mYLc8BnVc9izvitoYjRKuq5vvzfx8O/nQ4TMVVlTcKw6tEDeIwbiBq7QMtG6XolPVRqGCZQEb/ZC+i/nkh/RlyuEVsSA2VPfVbRhF5FvjTRBLbda6/PZnxr5g6opWXDWS3VSddiUi10WhO487TqAXm2LXsgQvzXPy7xWwnp2AdJyFguGaFhuPMgkKQPVwwZQVt1m0XU28bZrJJ+GnRTt+Kf+pLHbGeVAqSq6ac9bZlsSTPYnKzBE3KO5Vv39kT8ExG8/QxLUDog/O3OAEj8CGnfnuWqCSYXmiHpy9xpfpuCYLtGbsa7bxF23kuC++242fv/3d+6W/SVN9sLhM3L9qcZ6LLRnPGdrTs/RCqG9R1z+SXcnBteH0sFfjBh6e54QFYwF3m68d5sOksxGgPWaHX56tBkNr+YVXUa/oJblEUJT/cfwcyzH0KZEFKBKXvea8nSIf6Gg+T8cFtMwKFvV3c8e1d29syrrfjxufzJLNjRnFk9r6eNOE6+etZfvztFlESa3ZRhdgm16XK5rtizss2B03WXjn4Qkjgrf5O2uiiXokAl1ng85NjDYyu8r2ekFj7Vq+5KnTuFWFScM4XXKiNxx0cYOD0T5cgoga77bdU5PJtb+n2VQzm4rG7XcM7x6BAnzHfYDvuWefaqpbmnHnO1inhpTau/b7zfoWSRoSWI8pjq+rlcN02XZHidBZD8sFy5hMrfPNedKuDP2rgjJ9pzcgi4Yn+ya9FZQTVAq6lvn4YO6ZE9YDQS6hTD2SuRc8hHyW0y40DZiHN/Yb8PoPwWtFqQwNLCnKl6VKZbindqCBRqX4E3RoH3vGPQPbojEDRUhNGR0ZYYKi1LtMgk1S9viG73cEboHzZm3uBXWF8whxE2X9/g9zB8U6b2SBn0MQtw5+vukcEX9M/fsbyXVFiP1jjomssJOC7B+l/ptY6YyE8oGyrLokZ4U+UBgUS/KkSVBkoR/24f350t3e8DTVs2+UtTXN2eGPvhwfp0EWwfiLhx89+GTvkT75Ln9vkNcQvYeIRg6v/bB5kIxvJq4zufn++kcH6ON4gcOMQNbOv08G9wEYwya89e4Nm7zpYRegmWp6SOtqradsDpHyRMb7oZdy09PUmVsTKZFljwfO12fUqiXzOcWhEvkXtIQzGgWmdrBAwhfrRH2idKYAirx5lSCPOQCS3LhVgG5uozK0GvKYGUUpY3VA5ov1l7BB+YHrn2lWYEIM/z0OHinc0wpRk3Ccj35FubB1Bqgk68Z+7U2sNG/BaktxaFApylggnh3sSbGXiuMJ1+2U7VNi2o0fO+zLWArvs+DTbwPhVJJWquo97pnfvPw4M6z/BXt3xeIzg4GdayBxnla3IYoZDJwFdcDs48gIDvJUXX9tQNRcYUFpaadVBOPCMG2hS+oFNQ2VrS7XSYMAyWnJ0pvPeXCu9q8hwyUp87Kk1DIfd/qD+WU4RxRzbPJ/YFeU+7FtBPLpEwocNfZrjVPOmxe6QV2+NtUYBtO7CXXRCW3IF23ouVLx0yNLM++B1U9pU79LWZqtvDlJvMiUQ+3bR8Hzne2t+ekQILi2daLpDemc+3wbss5/9BYREsk7MBgUnHBu+Z8HyEXAFta7fDwjaLmiQB4W5oeoCb6CfBUzRE1khkVK/TwaNCEFfcQf68LUNiu1IlN1AXMS2iiP/6S4CUMGtfI8xniOLtZghjgk6Zm+BA336Fy2KXO5BpFBlMHaPvAn0KfeVPiOhg6iOuZSnBshYqKHBu1Zno6Q3K8SJAJdnI3xM/tbpg9tnVSoHM15T2cacplZzWe0dvXI5SVJ5+q2PTG6rI8D2OUHU424I8a01MEWyY1yva/rj94/VclTQTsZwfmqGKJ9G9G6kEbEhF1lR0WcN1uWDSa/aPrtlxzkc8paMzNLkAtGxrs/dmEOaQ1LS1DRwRVID2pYKW+OV3DMyhm2HEVV4aqRZAnZvWiG4xCNzWK6BwdMI8azxqp2uPPuF2+/1h6lWz5nbvzqxaGUNc9cPnMGD95RtM0Vg3PFMNkG8da6+wct9/qbxrKMLsmpoDWOoshA2MEVw1ZLR3gAE3qIOOjKqlMnYiaK3/f/WQRO9MdmhZWoBop8tUa0Y4sheXszbxsO8XIb2a5ZiZMMavtmu3IRcx+an0z00ll7yo0WVYozYIVPhthW/JS3J4vy2OfHCiWz+bsMw71X0N6MhGhqk7so5rwXL10WW+KlFVNTxPmUk/WDGUcz9zgPWJKgF766ZYR3ijtC+el77Mtdoo4LTiu+KtnSs4djt0pG2a2gDby8NvDbDusE/7SWb98JfgtS7mj4pu4cJSngkO9FN9AzgvZtoj0r+ffJe/SdlRhMJqEITJFs7jutA1BrIHd0o8uYvJ66o3rcCPCXBS1Ahy3zzWCU2Qpggufd33BHN5Z4oXemkxGNgTxverakgMvqVmp1LssI4mePBpjpw13iroZZ2fMQCkASLtyCmncCZxEdH5FtlqSCYfjTPFt2rSxtvuMatit4VoS82CbgfUf3ooGPyU0nP1zT9AD8s9C3ulVGcHhfRhlRiFcAylELOUxl0turGm0tePza7O/ubyfsGUPZvlsYlfBCqB4UYkAqHoqOBPMC8Z0714OAtisSxRd8ftD02885UAbIkDYpuDYgZ3R2KCkcDMV5k0WQS/HAjnFYTBWSiI+xTussWtHsMdTfCbYCcap1f5A/5ozo0LV1mI1nagi/JwS85DxigzHSYxxI3iXCps/djRodPh34u1SziQBhbv0/fPndH6+XTUi5EyS8OMygim0RAo3DHFSIEymOdict6WBTOD6q0qWt3XNquxzm4Z2Z9ad1MEVRONwWKTx4p/XpZBRMt6h0adJCtsnObpPgYzMEDyZRRBHVnDr8IvWhqmpLZ584lSMroLrjAV6qJNPXiXjeLAowhuNtQP6zWZNwqGXgrNejYrExu/lnTugoUBt8Wmugq8Kc3mdMYwpB+chT88cIb5nkwkaQikLubHHujTney0mRfkEtbkRU/wq6R7yfDYhOjxKUYmOP7bGP2T3YMgRFfdzsz5+3dYfFKNps0ZbmLRAvCJufRlGvgNucZllxwYfMTpjNMjtlsncnllT9hmYCFWtWylBqzpbBCl8M/KhLhhe/RE7vIXktIaE5qO105QahVnJlotbcKLESWN+brqdV5iZ03HuopO/Cy/YnM4P6yMddP3gCg1mEkdVwI54kP5Un6lJ7Jj8xK6Y44bTuXHLHTYLECipLrs/O7bFcjZyRwT+JlTnPGprWUXZtecjy8PAXL0HkNS7VZluZIT0FpO97HuBiudfy2Ig0DwrHMnJyIWdlPxAWTvq2IGgvgmGDWnABBkpi1z16rMcS2UxXmtjLMkCTpRI4YolXWZJFKoYIpJH5yV9aK53NeRzrG9Abw/oOS8wYRqPLJNKFE4S9YAJIzU498rBG3f6LOVBY0lVYrV4yhxRmU9iEoasnYuZp0yVvxRdKmM4QphRdK3VA6wku8zW4LFhc6182RdFWrCaI59Po4Peu5oFTrhetpsFonufrJVQNQvlIiS3Z5wnAwS2hgnJo1TPgxnhwoK0mxS5XdlzlO4pOrhS7559/GLSHij4MoMzXA/W75wYjxNQPKTQnquErf2r2KGhVn+bm4XlzEun3bVI1K9Tdzz73f7B2W4aE6TpABiRAAoiTF4WI0v1CsE6QhfK7RQArQeSAh4j4J33R77906lHVzLPRZgs23/OzngrmgOUqEciDUZ9nZjB9WeiFPxkDtJsH2pK9JWeuXw+JjxADw3a727cEgN7OU1chdTk526OuLxRbxLykWZkEWJRvWYT6vuInC4aBlQz6eU256aAug9lznAQyyGM+CymjgTf5yLh8kVvc5T4PecurvijcSRfd7eYe7rka21dbh+vrRGd6pQN+vaIijGx8ppU4pNsQTpwgkUzyWUABDTzgNWPyhG50p3s95E2v+rhgIbaz7nZjD/RsFTXX2sGOdqJTvWZD18hKR/qXJlE/MiMNkrc5b5vqSJYUQsdgxWny93/hV/LfGXrIgpIEHeb3e66cb0b6/Xr2Y8yQHhlQUyLTAqZibbchQbIB0SqhuFJQMqTH0C0HCspvFwTwDHSdZ8Fz4HnwAnTOybyI7iVUizseDi6eIsVKsAwoV6FSlWo1VjCoVUfmtDNwwFnnnHfBRZestIqKmkaLVm0go3YdOml10dMxMetmaQm09OjVp5+NlZ2DMywug4YMG+F2xWiLocZjjJfPuAmTpnqORablyVegMDov5oZiJXFYlttuicI6T8IxZpQ31KpTr0FjeAZB3nNYsxYxUSdbEw7IT/lBYM4tAQQShIxNbGYLCMhKVrGG1axlHUeSIhUZkAAESHcf95tB9fzDoQdvIQHw7L1dwj+gb2KG47UO3o0FFuYBqqsP9OUtNB+xGSaNNeSI2k/US2LDJFKCY5a1VN+j806gaxRuxP4LxicsTL52p402WC+dUiY+6IhCBQ5YLUmXNrqwzGYaNnMIetssbURG8VlmbT1t5P7Ka70esFw2lg3ojqnb+EGVe2kIYc9cJtPKC+rnB7meOx7CkGznDc/Yz8q+waTLLMMuAoMUOiZfBh/JbRauIHucv2O8bEOj7xhzO3upzzmAvKIXXB2kDabYdK62zNHyl6uyef8KIs9DO3c9RuyT8T1uVLRPvYVwJYatIGDHgyEpjuBpz7SPJPuQ4Y55+BI9/2tlgLjkiXp7GroQ9vdM5nJNbzqowRV2Kd9Pc7Z0oVEPHHqmc7fDZviYHmbzn/Pn6VEAVAtDQ/l7HES7ugBgROsatFf9pUAEgS2NUFBKIzQAyr9hdUcMY7kpVs9/LqbyUY1IpDjJL3mNyCDzvIGBUBLkUjKhnihgvPBfKMelBOoJbG2FildTp42IxLoeyzti5Mmpf9+siE+bGBeaKzA2lc/ONfDrnxZCAQA=) format('woff2'); -} -@font-face{ - font-family: "dm"; - font-weight: bold; - font-style: normal; - unicode-range: U+1B00-218F; - src: local('☺'), - url(data:font/woff2;charset=utf-8;base64,d09GMk9UVE8AAAsQAAsAAAAAEVAAAArGAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAADZp/GyAcKgZgAIEsATYCJANCBAYFgXQHIBuKEACO0xV3IEmSe0c05/9ekrsEyk8lPUoDXK5mnpohngqaBNGKEVwimNYU07a0Dv5E6RObSze/5SH/9u5+lMBf4FlUW4YJhFESSNAWUeABBtb1+7W6D/coHi000caFQieU++/MG3g+tYRpKCbNZPo/krili6RKyB4akRoZMjmgIZ2t37uCGbUBgiAREYR4s6Pn2T1enl5znmmFY0+5mZuCuKkWiJvG46bzubECPNqCz2jRpX7RV40mYMNCGIkLbREy2z3agpeE/JGt88H+N42B3tFIHARlLrGZOH3u4uVrdt5I1e477+ru+My6/v4uCtePCRYeFfr+js4qpetRX3P/L5Hg6XrU7ZyC9W1SGdedChfMnU+qAJCdmu58asZDffgbv+G77vrKzzmf/ZkEjL/HgMgu9Pv3czBkLqsdWFaMNkJEXEPIz75hYAOIQAUK8Ad1mHcgJQT+HmcDOSM2hNIRI5+4IURsIgLPQUqXPfWZWE5k8kieNy+d18X7i7+O3y8wEywS7Ba8I8eR9VSJ8Jpoueiu2SazAnMr82DzTyPUI+6INX1Giz7i/u8c9Tv/LmdN4znNc+qOMxPnQNjPlGLG73AcfN9/++n39/OxFz4+e5qbLHV+Vlj/tOVnmC2FdGwBk/BqKjbH8smlM2JI7Cd+4rz4XD3k0FqNPqmQMblFUGqN8qIXw7lRYkjrIoyj4QfaZFzOGSmxsdx/DGSADMtgoUQrqYEM7h3tonO/U12lv627rAtJZ9Ka9e/i0vfkWKnUB9iwnaLrVyIvXZKKjbrS/pdhIUdmzv4yl0BLtG0BWm3Dv2kd42MSohKSpNWqKkdXN6WTKkGVEcJIasL3KTdcCRWJ8UIo2yUMgP3YBmzOIwBb0g1epERryHzxTWF7fFJsSnS66FFoyRF7Xz/7wNiQxAhtqNrvsfVSmKjfWhvhI14IKdRVB37m3tKuwq9Sd8dNG+qW2uIXn747Qq0+OCEULssvSvEqy6xQIY5irIMutWYGQBtL4CNuBKYImNGhF+KCddp6t8TEKHWVg6u7R3qo494xwKssJdrsQ4rxYrjbnBeekHdu/X8UlAnq+nOxWEOuxqjFDwfGQPX3kipuNPeKlsjBaQp2wrKZIDP5/Mn5wJRh4aJtahmAvQPESMIHlvOhQYbXTDArHHv8CR7SX/EUWLOjehnVWFJi8ITjBKgWxLfsoneWKzray8vV3IryXbs8FLsYcQjYEu/A5olkP1HkY70hvZa2M/jdv2cwiGI/g/0BrsHgvZZ2+nj3+/oIpcHO3s+DM/N6cM/D3m52cVgD7HhGFL6GaV16z975XKVxC23ywcNvTnG+8Ik07fz/ZHqwvfOboT3Nixfv3SeP07pzSBbdRZ8KMDQ3ZusePMpRyvcH+J2WRXK7M+dvWrxonzxY39zkJw9QGkC96QM9VK3jIsptOwevUtzOORCwG6L53Bawojd3e95U3dfdK7r5qseu02rrnjXLDiyvX9q99OVe0Yr8fwTIa6dftnQP1g8e6F8zsLXVbofVq1VFLjp7lZ2ny+bVIvEAt5zI44DP9XNRtKkRf3myn2vkhKQ4pAf2dP3eBeoe4s0v/D6IpPGSkp+mAWMNlmD23Z+5MkxkkPKI7YfXWa861ASEUgbnqYQ+EodTkZmazCxpqiY1KoWBgATot1d/CmhtY3evdUfN8Z3Zsn9LyGHPyd2YsMbSKdgc7zwnO0pdXk4+oSLDNGFh0hBNSFQI40BdXkGK4SdunKaGAGnbv21g18Y3GLX0jv9Mo95xo37w/S2gTtiOZWQL1Qy2JK55zx1YLfRc7b7Z77TPA7dX57pF4pAa0D15AaDxHwPsc7Budm4E8ya3Fkl3BOcXQvc33a675V123CCT9KDyM6esUvrsZNWWTXaKQ27aM5VqRjIcccLPS+EiPdzo1M2YbMGB5so54mi1qRh/IRO57fTd+NLEnFQRzn0OsEsY4HtGfeSCiNtjek0/DSr28ZV6nws+ftw7syCvMP9xFiPpQfJfSOflZ1eUGYL8lAFKtyDGOVulz5X2V+N13T/PfqvT6mtdu15rdU6ftbl/rsmK2c8Jt3Nyeuh5VetNv1KPLJnEpC3Lyi4uk7Zsrd5y+kTguUCm2pU0ZOQnFlrfqrjkfsZLdVB2/mgxJcFsU4naWeF14dRpdUqJt8z+LOlQXK1qt16BL+8FCg7hGTS2GvDOLCP5XJMNu6CS1zeAwL8Y5Hhh3YOGtULzokDkBPLcwSEbxJ1LJ7ee7u6qfdctr9/irb4UESpL7CBhcQNWUhKj3B/LyTaq2JVkpualF1nfz/M9dPys6oTCKyXfS+bkSCru3Q28Z43H4sP0IyeUg5PzTFsORvNoWEhLwAGbkwzj5TMAK3FE45iPb+EALJkRfumh+8radc5I/lN8o3q54HqjJQ2nqGD1pYtB/vu37N0YOUn0looabuqq78zR5+oz9aIaXCWMNFXRjxy5co53tJrEa0zZB0nb/lP2VjfIjRzkwzTL76gakOo7sjpzFhatrJwt+o56BvInIAdX6UwKz1VjqQcex5z9Sfku+DfRLCpwus96j7miszBOBVKYK51FYdcHWO5MwkC5cTx9VozPmvvJ8HFLjvmFy8QazqSB5WPuD3KTBiVVMM7ye6p6+D6MMPyum3YPj6icKfqeknjXwIG7nAScOzPfgmDc7I5nMEdghup96Fu9h5D1fusO42kiBUwPhWZYIJ1BSaqw8x2TZIQDnQV8nn/ZkMqxL7d3/NffGsfREA+TjwzhcLyKxNzue+PPrX45YP7VxsIwPLmi7fsD66D2cyB/5H5Y/5NLP3T+QMD+T3w4AK30xW3KjYdXihZzJ/rw38LHH/Qfkt6IYDsV30/iHVS0JjpamqZJi0xjYHO8314qRjNGGhmviY3z3UOqJilB+mP5neYHT73tSpiWJrLdY8fNpdZ41JbVeEKgJiAqUDaVurKcBD4VGhUSLJ0QgeGvdTqe8ZPjMJQNjwGzYUka2MIyGi8bFgnhqlBy7EcctvwpVNtK+4SroRSGh017Etdm0u2fJxSHZBh7M9dlYnUmhZ3ihVFr4fo1cCRThD01qRqzLvP6EV1x5EZvGSdZWNSnxibGxSflWfzPOH2sLULkQSMCQuZsMIh56+5DSETwceyOiJEnaqrpeFMTL45KT2SVE6FCQT6mYkT6UxQL3kuE0ImJiQstjCOI5BABsSRIWyMm2PELv4BMxL8Js/UIJUYg3sC34LFtcngG8TljfTzC+gukFTFGxR5MXK+wY1kpy7AT2ansfHYlu5nNY8smTJwwdYJdby2ZlZWwVqwNS3AetMLZ4HDyuQ78PjA88OvALwM/DvQPvBvoRGjArf/P/p7+B4jA4FmsWNuyYU1qfEqEDIMgdpuy/DIT7Ueu/lfEF1QhhFBPUGOzf+ULT2q/fv3yySxG8AER+Hj7twX/+QIoqpXncPtqIkwNZH1EJiG5WU6zCgr9gb5DFXQaRa1vCO1i7b5v0XiWrEw12SRfxTKzzHEO2gfJIRZCQifwEY0ZQigICscQBIJieHjCYvgQimgqyFdBkqbxqDKH38SLt0C+FNx5UGIVYy003wLLO0kIu1KWk0CrK+dB4ZwguLYJOTtLGS9V8TTXwgaco2qtPL8YjqsfuXLUpsjFXIrCJUfEE6mJFbx4Yi3gzRcBfKgAX7PQQggA) format('woff2'); -} -@font-face{ - font-family: "dm"; - font-weight: bold; - font-style: normal; - unicode-range: U+2190-21FF; - src: local('☺'), - url(data:font/woff2;charset=utf-8;base64,d09GMk9UVE8AAAUwAAsAAAAAB1AAAATmAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAADYgiGyAcKgZgADQBNgIkAxgEBgWBdAcgG4sGAI7TpTZgEnN0/bdZ/WbmEV2iJA6zYsqmkhMnIk5BTAcihkUMkpy4sKwZOZW/86+rntyLICrXkQ0QR08+dWrm0gQ092BRC9DqXtSrFmhMLHCpwL2epzzWACpGhKqRXuEpp/dUeAUCAgeEEGVSsaH2AaPBuPtMK1N7Z4K8GeQtgSBv5eRtvBwusJBAXv0Y9B08uGa1Cq8P9LGwZ+MB/BzxAMI3FN8NuvJ/Wyh+GAJKQvxDYjftPnI66b6Uwkdeeru4ocHYGjOaaq4brORYs5kiqzHz43HWWGZoNlQf2H/8SMsd5Y5Fjq9N5ffv2QcdWSrnbSvN237wz3wHOHNnXr9OX1r74QSeH2ZgOI82/g8dsxCCYs+IPs2/AICQIT8I4IAAB1odPoJ/yf3kNfI7t4Mr5p5UvokvJnrtWBYaZvbgd6pOqUvqipGme1xmdViRfcQxYrfgMKuIct9ip1BD5VM/LmsVqEUx6zZVWvExKy6G4kfesCLcgU4ViqilYY8tJ/4on/VBDTt1y82q0EpdZpOrO7ZL6uqUNKyMWanyTVnL6ry441JomBkvyeUqPMNEuqw9Lp9SsJNM474Lh7CChhXZbQ673e7rmpmedsbMdTk7nWpmwTKqHG+Rf2sl+M6PPL69oOp0OjvnYl2zrrkZDVah5S4307BTVE44vpzkw0TU3plFld9ji85rxnvIW/ilSjJJZilGmu6d7VE7hh0jjhF8gJ2Jct9k3L8UryoYt9V9Nz6AZxx2dKEjenZ6esoZ4zTNdM+qWTomZicht5Wy6wrk/s1epEpWu7RgRRPBj7z8P7JbtZiFgRhFV+72YYEYnZSJmayKSrMzkjPWOTU9O62xOUYddgemYHHUXW4WwKLp0t0+GMCibrlZChYl61cc0c6e3ikpVjKbuk0a5UU5nSAvZ/LYhBtUTWhEDzt34SL7DD9DfcvMr+zLJ+nggG0wtnKwv1JjG7IODsY8w76ing/9W87i5ffkDJXDxzZlm5iMmbJM9Y+pbRY60S/ZuzrK4sC+bJw5jPrH2Gc444IOPXiuiRl/lajyciAfvVs+o3qCGZkHPck6/Iy5mf5x8yH8somOjVvGYp9acDw7YZmxTGnq8UuvdqJHCOivGrIOWgbVnPNwTI9tYGxA3T/hGnH5pWwuxC+fMP/G9M34GbqTLzIP8zyBxiMuquyaW/phXjvPWuYVrGTcp2tO/smKGfO+zGCdtfp5/T0B3rGxsTeXNgYGemZHJ8fGp54KXLe0LTwegE6qgACAP8RRI0l33p8GviAAAKytAXjcqFtsww2m1Bc3GSDKJQ15gfsI40B5DR/I3QaAnOGSp+BAMmChxbPhsfVj6138s6sI+GeJ31mASQAA7gQfCABinAFuoPlAOBcivCyASAsUKa3/tbWaWuqnejdBqM1hrcZr7S0MOvWfLy+8BgDwfceFS/zsno/da2urf/s5hG+AAA+czwr+wiqy+etcSGvLPYAAfJ9wU6ATLdkByYKglFGJEkagAwEA4AM5eCCCHwB0ABkQEKADcMCBBHgAMHcL7ooUYmwhhZ/zExnVadegWqUqTUTPg+iAffY71jDJpONEJdqJdKpU09er6i1DqVpNqjQz2EOUQO+iM9RQoyZQztPlgZahUGYP0qZkBv24RtWMDET77bGvVngAHWsLcAAAAAA=) format('woff2'); -} -@font-face{ - font-family: "dm"; - font-weight: bold; - font-style: normal; - unicode-range: U+F8FF-10FFFF; - src: local('☺'), - url(data:font/woff2;charset=utf-8;base64,d09GMk9UVE8AAAJ0AAsAAAAAA/QAAAIqAAEAAAAAAAAAAAAAAAAAAAAAAAAAAAAADYILGyAcKgZgAAQBNgIkAwQEBgWBdAcgGzADAC4D7IZjzymg62laLb5im9YqKYd4pp0IH5Q4wfM11t7f3RNPmCSGpIkh4c2yaCYkPItas2reuIZYSNQTM2Ss3ZtaSFrNsPSi0pFIpbFOplQSozQTgtx2D1DYR6JuhAypGLvREA2A7uApPgY1OYgeC8Ayvq0tfgCOy3/OjT9vz2+AA909pwF9/n14lic0TDDgLsCov3cD7HJP9eTYIyKzTeFwCA8+E4xCRLzVnb12vN/reyL9TgUEkfJgglcieJcPbf0Qt29BkrLv8J7zzxfL3bbhyD1wjGfBvFlGfC+BrvfzhBHx1jVXVjwp4PWnrNv+wl6+cBzQyXu6yQ23capTbj9uoPHOagFvx3+WIwca9UT0znIXQESUt2/589nB/3tmaqS0NLrz8cJ7wfRIj9sRh9955/JPstfOQqZk3y3czExOze66PSH71ADLTBIBnGTKJtW1sRzoGIAEAGBYnnxqGre7c9KDHAkko875Ih2j3eoVNMnzyi47CgTI/R/Y5O+BdBB/hz4QRzmYAVQZiQ3kMgAIIPD/xF1r5/lKD3MM+Fw1wOfx8Ya/07h5I9AoAAT0Q/wvR2PC/4+GwJWJmuVBIwSw0URCjAPogY6CoQcVigHUwNDQ4DoL6WWVtc7zaH+wHDXrjYnekaVEkWKlS6TJfCtX11KvsdHs+BudH+X2pDHteZVAhURkrVZjS6jKUF2dVSteBQb3BfvWuNnv6RUrUMSpBo7ENgAAAA==) format('woff2'); -} diff --git a/docs/md/assets/highlight.css b/docs/md/assets/highlight.css deleted file mode 100644 index e69de29..0000000 diff --git a/docs/md/assets/highlight.min.js b/docs/md/assets/highlight.min.js deleted file mode 100644 index f43ba9a..0000000 --- a/docs/md/assets/highlight.min.js +++ /dev/null @@ -1,1213 +0,0 @@ -/*! - Highlight.js v11.9.0 (git: f47103d4f1) - (c) 2006-2023 undefined and other contributors - License: BSD-3-Clause - */ - var hljs=function(){"use strict";function e(n){ - return n instanceof Map?n.clear=n.delete=n.set=()=>{ - throw Error("map is read-only")}:n instanceof Set&&(n.add=n.clear=n.delete=()=>{ - throw Error("set is read-only") - }),Object.freeze(n),Object.getOwnPropertyNames(n).forEach((t=>{ - const a=n[t],i=typeof a;"object"!==i&&"function"!==i||Object.isFrozen(a)||e(a) - })),n}class n{constructor(e){ - void 0===e.data&&(e.data={}),this.data=e.data,this.isMatchIgnored=!1} - ignoreMatch(){this.isMatchIgnored=!0}}function t(e){ - return e.replace(/&/g,"&").replace(//g,">").replace(/"/g,""").replace(/'/g,"'") - }function a(e,...n){const t=Object.create(null);for(const n in e)t[n]=e[n] - ;return n.forEach((e=>{for(const n in e)t[n]=e[n]})),t}const i=e=>!!e.scope - ;class r{constructor(e,n){ - this.buffer="",this.classPrefix=n.classPrefix,e.walk(this)}addText(e){ - this.buffer+=t(e)}openNode(e){if(!i(e))return;const n=((e,{prefix:n})=>{ - if(e.startsWith("language:"))return e.replace("language:","language-") - ;if(e.includes(".")){const t=e.split(".") - ;return[`${n}${t.shift()}`,...t.map(((e,n)=>`${e}${"_".repeat(n+1)}`))].join(" ") - }return`${n}${e}`})(e.scope,{prefix:this.classPrefix});this.span(n)} - closeNode(e){i(e)&&(this.buffer+="")}value(){return this.buffer}span(e){ - this.buffer+=``}}const s=(e={})=>{const n={children:[]} - ;return Object.assign(n,e),n};class o{constructor(){ - this.rootNode=s(),this.stack=[this.rootNode]}get top(){ - return this.stack[this.stack.length-1]}get root(){return this.rootNode}add(e){ - this.top.children.push(e)}openNode(e){const n=s({scope:e}) - ;this.add(n),this.stack.push(n)}closeNode(){ - if(this.stack.length>1)return this.stack.pop()}closeAllNodes(){ - for(;this.closeNode(););}toJSON(){return JSON.stringify(this.rootNode,null,4)} - walk(e){return this.constructor._walk(e,this.rootNode)}static _walk(e,n){ - return"string"==typeof n?e.addText(n):n.children&&(e.openNode(n), - n.children.forEach((n=>this._walk(e,n))),e.closeNode(n)),e}static _collapse(e){ - "string"!=typeof e&&e.children&&(e.children.every((e=>"string"==typeof e))?e.children=[e.children.join("")]:e.children.forEach((e=>{ - o._collapse(e)})))}}class l extends o{constructor(e){super(),this.options=e} - addText(e){""!==e&&this.add(e)}startScope(e){this.openNode(e)}endScope(){ - this.closeNode()}__addSublanguage(e,n){const t=e.root - ;n&&(t.scope="language:"+n),this.add(t)}toHTML(){ - return new r(this,this.options).value()}finalize(){ - return this.closeAllNodes(),!0}}function c(e){ - return e?"string"==typeof e?e:e.source:null}function d(e){return b("(?=",e,")")} - function g(e){return b("(?:",e,")*")}function u(e){return b("(?:",e,")?")} - function b(...e){return e.map((e=>c(e))).join("")}function m(...e){const n=(e=>{ - const n=e[e.length-1] - ;return"object"==typeof n&&n.constructor===Object?(e.splice(e.length-1,1),n):{} - })(e);return"("+(n.capture?"":"?:")+e.map((e=>c(e))).join("|")+")"} - function p(e){return RegExp(e.toString()+"|").exec("").length-1} - const _=/\[(?:[^\\\]]|\\.)*\]|\(\??|\\([1-9][0-9]*)|\\./ - ;function h(e,{joinWith:n}){let t=0;return e.map((e=>{t+=1;const n=t - ;let a=c(e),i="";for(;a.length>0;){const e=_.exec(a);if(!e){i+=a;break} - i+=a.substring(0,e.index), - a=a.substring(e.index+e[0].length),"\\"===e[0][0]&&e[1]?i+="\\"+(Number(e[1])+n):(i+=e[0], - "("===e[0]&&t++)}return i})).map((e=>`(${e})`)).join(n)} - const f="[a-zA-Z]\\w*",E="[a-zA-Z_]\\w*",y="\\b\\d+(\\.\\d+)?",N="(-?)(\\b0[xX][a-fA-F0-9]+|(\\b\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?)",w="\\b(0b[01]+)",v={ - begin:"\\\\[\\s\\S]",relevance:0},O={scope:"string",begin:"'",end:"'", - illegal:"\\n",contains:[v]},k={scope:"string",begin:'"',end:'"',illegal:"\\n", - contains:[v]},x=(e,n,t={})=>{const i=a({scope:"comment",begin:e,end:n, - contains:[]},t);i.contains.push({scope:"doctag", - begin:"[ ]*(?=(TODO|FIXME|NOTE|BUG|OPTIMIZE|HACK|XXX):)", - end:/(TODO|FIXME|NOTE|BUG|OPTIMIZE|HACK|XXX):/,excludeBegin:!0,relevance:0}) - ;const r=m("I","a","is","so","us","to","at","if","in","it","on",/[A-Za-z]+['](d|ve|re|ll|t|s|n)/,/[A-Za-z]+[-][a-z]+/,/[A-Za-z][a-z]{2,}/) - ;return i.contains.push({begin:b(/[ ]+/,"(",r,/[.]?[:]?([.][ ]|[ ])/,"){3}")}),i - },M=x("//","$"),S=x("/\\*","\\*/"),A=x("#","$");var C=Object.freeze({ - __proto__:null,APOS_STRING_MODE:O,BACKSLASH_ESCAPE:v,BINARY_NUMBER_MODE:{ - scope:"number",begin:w,relevance:0},BINARY_NUMBER_RE:w,COMMENT:x, - C_BLOCK_COMMENT_MODE:S,C_LINE_COMMENT_MODE:M,C_NUMBER_MODE:{scope:"number", - begin:N,relevance:0},C_NUMBER_RE:N,END_SAME_AS_BEGIN:e=>Object.assign(e,{ - "on:begin":(e,n)=>{n.data._beginMatch=e[1]},"on:end":(e,n)=>{ - n.data._beginMatch!==e[1]&&n.ignoreMatch()}}),HASH_COMMENT_MODE:A,IDENT_RE:f, - MATCH_NOTHING_RE:/\b\B/,METHOD_GUARD:{begin:"\\.\\s*"+E,relevance:0}, - NUMBER_MODE:{scope:"number",begin:y,relevance:0},NUMBER_RE:y, - PHRASAL_WORDS_MODE:{ - begin:/\b(a|an|the|are|I'm|isn't|don't|doesn't|won't|but|just|should|pretty|simply|enough|gonna|going|wtf|so|such|will|you|your|they|like|more)\b/ - },QUOTE_STRING_MODE:k,REGEXP_MODE:{scope:"regexp",begin:/\/(?=[^/\n]*\/)/, - end:/\/[gimuy]*/,contains:[v,{begin:/\[/,end:/\]/,relevance:0,contains:[v]}]}, - RE_STARTERS_RE:"!|!=|!==|%|%=|&|&&|&=|\\*|\\*=|\\+|\\+=|,|-|-=|/=|/|:|;|<<|<<=|<=|<|===|==|=|>>>=|>>=|>=|>>>|>>|>|\\?|\\[|\\{|\\(|\\^|\\^=|\\||\\|=|\\|\\||~", - SHEBANG:(e={})=>{const n=/^#![ ]*\// - ;return e.binary&&(e.begin=b(n,/.*\b/,e.binary,/\b.*/)),a({scope:"meta",begin:n, - end:/$/,relevance:0,"on:begin":(e,n)=>{0!==e.index&&n.ignoreMatch()}},e)}, - TITLE_MODE:{scope:"title",begin:f,relevance:0},UNDERSCORE_IDENT_RE:E, - UNDERSCORE_TITLE_MODE:{scope:"title",begin:E,relevance:0}});function T(e,n){ - "."===e.input[e.index-1]&&n.ignoreMatch()}function R(e,n){ - void 0!==e.className&&(e.scope=e.className,delete e.className)}function D(e,n){ - n&&e.beginKeywords&&(e.begin="\\b("+e.beginKeywords.split(" ").join("|")+")(?!\\.)(?=\\b|\\s)", - e.__beforeBegin=T,e.keywords=e.keywords||e.beginKeywords,delete e.beginKeywords, - void 0===e.relevance&&(e.relevance=0))}function I(e,n){ - Array.isArray(e.illegal)&&(e.illegal=m(...e.illegal))}function L(e,n){ - if(e.match){ - if(e.begin||e.end)throw Error("begin & end are not supported with match") - ;e.begin=e.match,delete e.match}}function B(e,n){ - void 0===e.relevance&&(e.relevance=1)}const $=(e,n)=>{if(!e.beforeMatch)return - ;if(e.starts)throw Error("beforeMatch cannot be used with starts") - ;const t=Object.assign({},e);Object.keys(e).forEach((n=>{delete e[n] - })),e.keywords=t.keywords,e.begin=b(t.beforeMatch,d(t.begin)),e.starts={ - relevance:0,contains:[Object.assign(t,{endsParent:!0})] - },e.relevance=0,delete t.beforeMatch - },z=["of","and","for","in","not","or","if","then","parent","list","value"],F="keyword" - ;function U(e,n,t=F){const a=Object.create(null) - ;return"string"==typeof e?i(t,e.split(" ")):Array.isArray(e)?i(t,e):Object.keys(e).forEach((t=>{ - Object.assign(a,U(e[t],n,t))})),a;function i(e,t){ - n&&(t=t.map((e=>e.toLowerCase()))),t.forEach((n=>{const t=n.split("|") - ;a[t[0]]=[e,j(t[0],t[1])]}))}}function j(e,n){ - return n?Number(n):(e=>z.includes(e.toLowerCase()))(e)?0:1}const P={},K=e=>{ - console.error(e)},H=(e,...n)=>{console.log("WARN: "+e,...n)},q=(e,n)=>{ - P[`${e}/${n}`]||(console.log(`Deprecated as of ${e}. ${n}`),P[`${e}/${n}`]=!0) - },G=Error();function Z(e,n,{key:t}){let a=0;const i=e[t],r={},s={} - ;for(let e=1;e<=n.length;e++)s[e+a]=i[e],r[e+a]=!0,a+=p(n[e-1]) - ;e[t]=s,e[t]._emit=r,e[t]._multi=!0}function W(e){(e=>{ - e.scope&&"object"==typeof e.scope&&null!==e.scope&&(e.beginScope=e.scope, - delete e.scope)})(e),"string"==typeof e.beginScope&&(e.beginScope={ - _wrap:e.beginScope}),"string"==typeof e.endScope&&(e.endScope={_wrap:e.endScope - }),(e=>{if(Array.isArray(e.begin)){ - if(e.skip||e.excludeBegin||e.returnBegin)throw K("skip, excludeBegin, returnBegin not compatible with beginScope: {}"), - G - ;if("object"!=typeof e.beginScope||null===e.beginScope)throw K("beginScope must be object"), - G;Z(e,e.begin,{key:"beginScope"}),e.begin=h(e.begin,{joinWith:""})}})(e),(e=>{ - if(Array.isArray(e.end)){ - if(e.skip||e.excludeEnd||e.returnEnd)throw K("skip, excludeEnd, returnEnd not compatible with endScope: {}"), - G - ;if("object"!=typeof e.endScope||null===e.endScope)throw K("endScope must be object"), - G;Z(e,e.end,{key:"endScope"}),e.end=h(e.end,{joinWith:""})}})(e)}function Q(e){ - function n(n,t){ - return RegExp(c(n),"m"+(e.case_insensitive?"i":"")+(e.unicodeRegex?"u":"")+(t?"g":"")) - }class t{constructor(){ - this.matchIndexes={},this.regexes=[],this.matchAt=1,this.position=0} - addRule(e,n){ - n.position=this.position++,this.matchIndexes[this.matchAt]=n,this.regexes.push([n,e]), - this.matchAt+=p(e)+1}compile(){0===this.regexes.length&&(this.exec=()=>null) - ;const e=this.regexes.map((e=>e[1]));this.matcherRe=n(h(e,{joinWith:"|" - }),!0),this.lastIndex=0}exec(e){this.matcherRe.lastIndex=this.lastIndex - ;const n=this.matcherRe.exec(e);if(!n)return null - ;const t=n.findIndex(((e,n)=>n>0&&void 0!==e)),a=this.matchIndexes[t] - ;return n.splice(0,t),Object.assign(n,a)}}class i{constructor(){ - this.rules=[],this.multiRegexes=[], - this.count=0,this.lastIndex=0,this.regexIndex=0}getMatcher(e){ - if(this.multiRegexes[e])return this.multiRegexes[e];const n=new t - ;return this.rules.slice(e).forEach((([e,t])=>n.addRule(e,t))), - n.compile(),this.multiRegexes[e]=n,n}resumingScanAtSamePosition(){ - return 0!==this.regexIndex}considerAll(){this.regexIndex=0}addRule(e,n){ - this.rules.push([e,n]),"begin"===n.type&&this.count++}exec(e){ - const n=this.getMatcher(this.regexIndex);n.lastIndex=this.lastIndex - ;let t=n.exec(e) - ;if(this.resumingScanAtSamePosition())if(t&&t.index===this.lastIndex);else{ - const n=this.getMatcher(0);n.lastIndex=this.lastIndex+1,t=n.exec(e)} - return t&&(this.regexIndex+=t.position+1, - this.regexIndex===this.count&&this.considerAll()),t}} - if(e.compilerExtensions||(e.compilerExtensions=[]), - e.contains&&e.contains.includes("self"))throw Error("ERR: contains `self` is not supported at the top-level of a language. See documentation.") - ;return e.classNameAliases=a(e.classNameAliases||{}),function t(r,s){const o=r - ;if(r.isCompiled)return o - ;[R,L,W,$].forEach((e=>e(r,s))),e.compilerExtensions.forEach((e=>e(r,s))), - r.__beforeBegin=null,[D,I,B].forEach((e=>e(r,s))),r.isCompiled=!0;let l=null - ;return"object"==typeof r.keywords&&r.keywords.$pattern&&(r.keywords=Object.assign({},r.keywords), - l=r.keywords.$pattern, - delete r.keywords.$pattern),l=l||/\w+/,r.keywords&&(r.keywords=U(r.keywords,e.case_insensitive)), - o.keywordPatternRe=n(l,!0), - s&&(r.begin||(r.begin=/\B|\b/),o.beginRe=n(o.begin),r.end||r.endsWithParent||(r.end=/\B|\b/), - r.end&&(o.endRe=n(o.end)), - o.terminatorEnd=c(o.end)||"",r.endsWithParent&&s.terminatorEnd&&(o.terminatorEnd+=(r.end?"|":"")+s.terminatorEnd)), - r.illegal&&(o.illegalRe=n(r.illegal)), - r.contains||(r.contains=[]),r.contains=[].concat(...r.contains.map((e=>(e=>(e.variants&&!e.cachedVariants&&(e.cachedVariants=e.variants.map((n=>a(e,{ - variants:null},n)))),e.cachedVariants?e.cachedVariants:X(e)?a(e,{ - starts:e.starts?a(e.starts):null - }):Object.isFrozen(e)?a(e):e))("self"===e?r:e)))),r.contains.forEach((e=>{t(e,o) - })),r.starts&&t(r.starts,s),o.matcher=(e=>{const n=new i - ;return e.contains.forEach((e=>n.addRule(e.begin,{rule:e,type:"begin" - }))),e.terminatorEnd&&n.addRule(e.terminatorEnd,{type:"end" - }),e.illegal&&n.addRule(e.illegal,{type:"illegal"}),n})(o),o}(e)}function X(e){ - return!!e&&(e.endsWithParent||X(e.starts))}class V extends Error{ - constructor(e,n){super(e),this.name="HTMLInjectionError",this.html=n}} - const J=t,Y=a,ee=Symbol("nomatch"),ne=t=>{ - const a=Object.create(null),i=Object.create(null),r=[];let s=!0 - ;const o="Could not find the language '{}', did you forget to load/include a language module?",c={ - disableAutodetect:!0,name:"Plain text",contains:[]};let p={ - ignoreUnescapedHTML:!1,throwUnescapedHTML:!1,noHighlightRe:/^(no-?highlight)$/i, - languageDetectRe:/\blang(?:uage)?-([\w-]+)\b/i,classPrefix:"hljs-", - cssSelector:"pre code",languages:null,__emitter:l};function _(e){ - return p.noHighlightRe.test(e)}function h(e,n,t){let a="",i="" - ;"object"==typeof n?(a=e, - t=n.ignoreIllegals,i=n.language):(q("10.7.0","highlight(lang, code, ...args) has been deprecated."), - q("10.7.0","Please use highlight(code, options) instead.\nhttps://github.com/highlightjs/highlight.js/issues/2277"), - i=e,a=n),void 0===t&&(t=!0);const r={code:a,language:i};x("before:highlight",r) - ;const s=r.result?r.result:f(r.language,r.code,t) - ;return s.code=r.code,x("after:highlight",s),s}function f(e,t,i,r){ - const l=Object.create(null);function c(){if(!x.keywords)return void S.addText(A) - ;let e=0;x.keywordPatternRe.lastIndex=0;let n=x.keywordPatternRe.exec(A),t="" - ;for(;n;){t+=A.substring(e,n.index) - ;const i=w.case_insensitive?n[0].toLowerCase():n[0],r=(a=i,x.keywords[a]);if(r){ - const[e,a]=r - ;if(S.addText(t),t="",l[i]=(l[i]||0)+1,l[i]<=7&&(C+=a),e.startsWith("_"))t+=n[0];else{ - const t=w.classNameAliases[e]||e;g(n[0],t)}}else t+=n[0] - ;e=x.keywordPatternRe.lastIndex,n=x.keywordPatternRe.exec(A)}var a - ;t+=A.substring(e),S.addText(t)}function d(){null!=x.subLanguage?(()=>{ - if(""===A)return;let e=null;if("string"==typeof x.subLanguage){ - if(!a[x.subLanguage])return void S.addText(A) - ;e=f(x.subLanguage,A,!0,M[x.subLanguage]),M[x.subLanguage]=e._top - }else e=E(A,x.subLanguage.length?x.subLanguage:null) - ;x.relevance>0&&(C+=e.relevance),S.__addSublanguage(e._emitter,e.language) - })():c(),A=""}function g(e,n){ - ""!==e&&(S.startScope(n),S.addText(e),S.endScope())}function u(e,n){let t=1 - ;const a=n.length-1;for(;t<=a;){if(!e._emit[t]){t++;continue} - const a=w.classNameAliases[e[t]]||e[t],i=n[t];a?g(i,a):(A=i,c(),A=""),t++}} - function b(e,n){ - return e.scope&&"string"==typeof e.scope&&S.openNode(w.classNameAliases[e.scope]||e.scope), - e.beginScope&&(e.beginScope._wrap?(g(A,w.classNameAliases[e.beginScope._wrap]||e.beginScope._wrap), - A=""):e.beginScope._multi&&(u(e.beginScope,n),A="")),x=Object.create(e,{parent:{ - value:x}}),x}function m(e,t,a){let i=((e,n)=>{const t=e&&e.exec(n) - ;return t&&0===t.index})(e.endRe,a);if(i){if(e["on:end"]){const a=new n(e) - ;e["on:end"](t,a),a.isMatchIgnored&&(i=!1)}if(i){ - for(;e.endsParent&&e.parent;)e=e.parent;return e}} - if(e.endsWithParent)return m(e.parent,t,a)}function _(e){ - return 0===x.matcher.regexIndex?(A+=e[0],1):(D=!0,0)}function h(e){ - const n=e[0],a=t.substring(e.index),i=m(x,e,a);if(!i)return ee;const r=x - ;x.endScope&&x.endScope._wrap?(d(), - g(n,x.endScope._wrap)):x.endScope&&x.endScope._multi?(d(), - u(x.endScope,e)):r.skip?A+=n:(r.returnEnd||r.excludeEnd||(A+=n), - d(),r.excludeEnd&&(A=n));do{ - x.scope&&S.closeNode(),x.skip||x.subLanguage||(C+=x.relevance),x=x.parent - }while(x!==i.parent);return i.starts&&b(i.starts,e),r.returnEnd?0:n.length} - let y={};function N(a,r){const o=r&&r[0];if(A+=a,null==o)return d(),0 - ;if("begin"===y.type&&"end"===r.type&&y.index===r.index&&""===o){ - if(A+=t.slice(r.index,r.index+1),!s){const n=Error(`0 width match regex (${e})`) - ;throw n.languageName=e,n.badRule=y.rule,n}return 1} - if(y=r,"begin"===r.type)return(e=>{ - const t=e[0],a=e.rule,i=new n(a),r=[a.__beforeBegin,a["on:begin"]] - ;for(const n of r)if(n&&(n(e,i),i.isMatchIgnored))return _(t) - ;return a.skip?A+=t:(a.excludeBegin&&(A+=t), - d(),a.returnBegin||a.excludeBegin||(A=t)),b(a,e),a.returnBegin?0:t.length})(r) - ;if("illegal"===r.type&&!i){ - const e=Error('Illegal lexeme "'+o+'" for mode "'+(x.scope||"")+'"') - ;throw e.mode=x,e}if("end"===r.type){const e=h(r);if(e!==ee)return e} - if("illegal"===r.type&&""===o)return 1 - ;if(R>1e5&&R>3*r.index)throw Error("potential infinite loop, way more iterations than matches") - ;return A+=o,o.length}const w=v(e) - ;if(!w)throw K(o.replace("{}",e)),Error('Unknown language: "'+e+'"') - ;const O=Q(w);let k="",x=r||O;const M={},S=new p.__emitter(p);(()=>{const e=[] - ;for(let n=x;n!==w;n=n.parent)n.scope&&e.unshift(n.scope) - ;e.forEach((e=>S.openNode(e)))})();let A="",C=0,T=0,R=0,D=!1;try{ - if(w.__emitTokens)w.__emitTokens(t,S);else{for(x.matcher.considerAll();;){ - R++,D?D=!1:x.matcher.considerAll(),x.matcher.lastIndex=T - ;const e=x.matcher.exec(t);if(!e)break;const n=N(t.substring(T,e.index),e) - ;T=e.index+n}N(t.substring(T))}return S.finalize(),k=S.toHTML(),{language:e, - value:k,relevance:C,illegal:!1,_emitter:S,_top:x}}catch(n){ - if(n.message&&n.message.includes("Illegal"))return{language:e,value:J(t), - illegal:!0,relevance:0,_illegalBy:{message:n.message,index:T, - context:t.slice(T-100,T+100),mode:n.mode,resultSoFar:k},_emitter:S};if(s)return{ - language:e,value:J(t),illegal:!1,relevance:0,errorRaised:n,_emitter:S,_top:x} - ;throw n}}function E(e,n){n=n||p.languages||Object.keys(a);const t=(e=>{ - const n={value:J(e),illegal:!1,relevance:0,_top:c,_emitter:new p.__emitter(p)} - ;return n._emitter.addText(e),n})(e),i=n.filter(v).filter(k).map((n=>f(n,e,!1))) - ;i.unshift(t);const r=i.sort(((e,n)=>{ - if(e.relevance!==n.relevance)return n.relevance-e.relevance - ;if(e.language&&n.language){if(v(e.language).supersetOf===n.language)return 1 - ;if(v(n.language).supersetOf===e.language)return-1}return 0})),[s,o]=r,l=s - ;return l.secondBest=o,l}function y(e){let n=null;const t=(e=>{ - let n=e.className+" ";n+=e.parentNode?e.parentNode.className:"" - ;const t=p.languageDetectRe.exec(n);if(t){const n=v(t[1]) - ;return n||(H(o.replace("{}",t[1])), - H("Falling back to no-highlight mode for this block.",e)),n?t[1]:"no-highlight"} - return n.split(/\s+/).find((e=>_(e)||v(e)))})(e);if(_(t))return - ;if(x("before:highlightElement",{el:e,language:t - }),e.dataset.highlighted)return void console.log("Element previously highlighted. To highlight again, first unset `dataset.highlighted`.",e) - ;if(e.children.length>0&&(p.ignoreUnescapedHTML||(console.warn("One of your code blocks includes unescaped HTML. This is a potentially serious security risk."), - console.warn("https://github.com/highlightjs/highlight.js/wiki/security"), - console.warn("The element with unescaped HTML:"), - console.warn(e)),p.throwUnescapedHTML))throw new V("One of your code blocks includes unescaped HTML.",e.innerHTML) - ;n=e;const a=n.textContent,r=t?h(a,{language:t,ignoreIllegals:!0}):E(a) - ;e.innerHTML=r.value,e.dataset.highlighted="yes",((e,n,t)=>{const a=n&&i[n]||t - ;e.classList.add("hljs"),e.classList.add("language-"+a) - })(e,t,r.language),e.result={language:r.language,re:r.relevance, - relevance:r.relevance},r.secondBest&&(e.secondBest={ - language:r.secondBest.language,relevance:r.secondBest.relevance - }),x("after:highlightElement",{el:e,result:r,text:a})}let N=!1;function w(){ - "loading"!==document.readyState?document.querySelectorAll(p.cssSelector).forEach(y):N=!0 - }function v(e){return e=(e||"").toLowerCase(),a[e]||a[i[e]]} - function O(e,{languageName:n}){"string"==typeof e&&(e=[e]),e.forEach((e=>{ - i[e.toLowerCase()]=n}))}function k(e){const n=v(e) - ;return n&&!n.disableAutodetect}function x(e,n){const t=e;r.forEach((e=>{ - e[t]&&e[t](n)}))} - "undefined"!=typeof window&&window.addEventListener&&window.addEventListener("DOMContentLoaded",(()=>{ - N&&w()}),!1),Object.assign(t,{highlight:h,highlightAuto:E,highlightAll:w, - highlightElement:y, - highlightBlock:e=>(q("10.7.0","highlightBlock will be removed entirely in v12.0"), - q("10.7.0","Please use highlightElement now."),y(e)),configure:e=>{p=Y(p,e)}, - initHighlighting:()=>{ - w(),q("10.6.0","initHighlighting() deprecated. Use highlightAll() now.")}, - initHighlightingOnLoad:()=>{ - w(),q("10.6.0","initHighlightingOnLoad() deprecated. Use highlightAll() now.") - },registerLanguage:(e,n)=>{let i=null;try{i=n(t)}catch(n){ - if(K("Language definition for '{}' could not be registered.".replace("{}",e)), - !s)throw n;K(n),i=c} - i.name||(i.name=e),a[e]=i,i.rawDefinition=n.bind(null,t),i.aliases&&O(i.aliases,{ - languageName:e})},unregisterLanguage:e=>{delete a[e] - ;for(const n of Object.keys(i))i[n]===e&&delete i[n]}, - listLanguages:()=>Object.keys(a),getLanguage:v,registerAliases:O, - autoDetection:k,inherit:Y,addPlugin:e=>{(e=>{ - e["before:highlightBlock"]&&!e["before:highlightElement"]&&(e["before:highlightElement"]=n=>{ - e["before:highlightBlock"](Object.assign({block:n.el},n)) - }),e["after:highlightBlock"]&&!e["after:highlightElement"]&&(e["after:highlightElement"]=n=>{ - e["after:highlightBlock"](Object.assign({block:n.el},n))})})(e),r.push(e)}, - removePlugin:e=>{const n=r.indexOf(e);-1!==n&&r.splice(n,1)}}),t.debugMode=()=>{ - s=!1},t.safeMode=()=>{s=!0},t.versionString="11.9.0",t.regex={concat:b, - lookahead:d,either:m,optional:u,anyNumberOfTimes:g} - ;for(const n in C)"object"==typeof C[n]&&e(C[n]);return Object.assign(t,C),t - },te=ne({});te.newInstance=()=>ne({});var ae=te;const ie=e=>({IMPORTANT:{ - scope:"meta",begin:"!important"},BLOCK_COMMENT:e.C_BLOCK_COMMENT_MODE,HEXCOLOR:{ - scope:"number",begin:/#(([0-9a-fA-F]{3,4})|(([0-9a-fA-F]{2}){3,4}))\b/}, - FUNCTION_DISPATCH:{className:"built_in",begin:/[\w-]+(?=\()/}, - ATTRIBUTE_SELECTOR_MODE:{scope:"selector-attr",begin:/\[/,end:/\]/,illegal:"$", - contains:[e.APOS_STRING_MODE,e.QUOTE_STRING_MODE]},CSS_NUMBER_MODE:{ - scope:"number", - begin:e.NUMBER_RE+"(%|em|ex|ch|rem|vw|vh|vmin|vmax|cm|mm|in|pt|pc|px|deg|grad|rad|turn|s|ms|Hz|kHz|dpi|dpcm|dppx)?", - relevance:0},CSS_VARIABLE:{className:"attr",begin:/--[A-Za-z_][A-Za-z0-9_-]*/} - }),re=["a","abbr","address","article","aside","audio","b","blockquote","body","button","canvas","caption","cite","code","dd","del","details","dfn","div","dl","dt","em","fieldset","figcaption","figure","footer","form","h1","h2","h3","h4","h5","h6","header","hgroup","html","i","iframe","img","input","ins","kbd","label","legend","li","main","mark","menu","nav","object","ol","p","q","quote","samp","section","span","strong","summary","sup","table","tbody","td","textarea","tfoot","th","thead","time","tr","ul","var","video"],se=["any-hover","any-pointer","aspect-ratio","color","color-gamut","color-index","device-aspect-ratio","device-height","device-width","display-mode","forced-colors","grid","height","hover","inverted-colors","monochrome","orientation","overflow-block","overflow-inline","pointer","prefers-color-scheme","prefers-contrast","prefers-reduced-motion","prefers-reduced-transparency","resolution","scan","scripting","update","width","min-width","max-width","min-height","max-height"],oe=["active","any-link","blank","checked","current","default","defined","dir","disabled","drop","empty","enabled","first","first-child","first-of-type","fullscreen","future","focus","focus-visible","focus-within","has","host","host-context","hover","indeterminate","in-range","invalid","is","lang","last-child","last-of-type","left","link","local-link","not","nth-child","nth-col","nth-last-child","nth-last-col","nth-last-of-type","nth-of-type","only-child","only-of-type","optional","out-of-range","past","placeholder-shown","read-only","read-write","required","right","root","scope","target","target-within","user-invalid","valid","visited","where"],le=["after","backdrop","before","cue","cue-region","first-letter","first-line","grammar-error","marker","part","placeholder","selection","slotted","spelling-error"],ce=["align-content","align-items","align-self","all","animation","animation-delay","animation-direction","animation-duration","animation-fill-mode","animation-iteration-count","animation-name","animation-play-state","animation-timing-function","backface-visibility","background","background-attachment","background-blend-mode","background-clip","background-color","background-image","background-origin","background-position","background-repeat","background-size","block-size","border","border-block","border-block-color","border-block-end","border-block-end-color","border-block-end-style","border-block-end-width","border-block-start","border-block-start-color","border-block-start-style","border-block-start-width","border-block-style","border-block-width","border-bottom","border-bottom-color","border-bottom-left-radius","border-bottom-right-radius","border-bottom-style","border-bottom-width","border-collapse","border-color","border-image","border-image-outset","border-image-repeat","border-image-slice","border-image-source","border-image-width","border-inline","border-inline-color","border-inline-end","border-inline-end-color","border-inline-end-style","border-inline-end-width","border-inline-start","border-inline-start-color","border-inline-start-style","border-inline-start-width","border-inline-style","border-inline-width","border-left","border-left-color","border-left-style","border-left-width","border-radius","border-right","border-right-color","border-right-style","border-right-width","border-spacing","border-style","border-top","border-top-color","border-top-left-radius","border-top-right-radius","border-top-style","border-top-width","border-width","bottom","box-decoration-break","box-shadow","box-sizing","break-after","break-before","break-inside","caption-side","caret-color","clear","clip","clip-path","clip-rule","color","column-count","column-fill","column-gap","column-rule","column-rule-color","column-rule-style","column-rule-width","column-span","column-width","columns","contain","content","content-visibility","counter-increment","counter-reset","cue","cue-after","cue-before","cursor","direction","display","empty-cells","filter","flex","flex-basis","flex-direction","flex-flow","flex-grow","flex-shrink","flex-wrap","float","flow","font","font-display","font-family","font-feature-settings","font-kerning","font-language-override","font-size","font-size-adjust","font-smoothing","font-stretch","font-style","font-synthesis","font-variant","font-variant-caps","font-variant-east-asian","font-variant-ligatures","font-variant-numeric","font-variant-position","font-variation-settings","font-weight","gap","glyph-orientation-vertical","grid","grid-area","grid-auto-columns","grid-auto-flow","grid-auto-rows","grid-column","grid-column-end","grid-column-start","grid-gap","grid-row","grid-row-end","grid-row-start","grid-template","grid-template-areas","grid-template-columns","grid-template-rows","hanging-punctuation","height","hyphens","icon","image-orientation","image-rendering","image-resolution","ime-mode","inline-size","isolation","justify-content","left","letter-spacing","line-break","line-height","list-style","list-style-image","list-style-position","list-style-type","margin","margin-block","margin-block-end","margin-block-start","margin-bottom","margin-inline","margin-inline-end","margin-inline-start","margin-left","margin-right","margin-top","marks","mask","mask-border","mask-border-mode","mask-border-outset","mask-border-repeat","mask-border-slice","mask-border-source","mask-border-width","mask-clip","mask-composite","mask-image","mask-mode","mask-origin","mask-position","mask-repeat","mask-size","mask-type","max-block-size","max-height","max-inline-size","max-width","min-block-size","min-height","min-inline-size","min-width","mix-blend-mode","nav-down","nav-index","nav-left","nav-right","nav-up","none","normal","object-fit","object-position","opacity","order","orphans","outline","outline-color","outline-offset","outline-style","outline-width","overflow","overflow-wrap","overflow-x","overflow-y","padding","padding-block","padding-block-end","padding-block-start","padding-bottom","padding-inline","padding-inline-end","padding-inline-start","padding-left","padding-right","padding-top","page-break-after","page-break-before","page-break-inside","pause","pause-after","pause-before","perspective","perspective-origin","pointer-events","position","quotes","resize","rest","rest-after","rest-before","right","row-gap","scroll-margin","scroll-margin-block","scroll-margin-block-end","scroll-margin-block-start","scroll-margin-bottom","scroll-margin-inline","scroll-margin-inline-end","scroll-margin-inline-start","scroll-margin-left","scroll-margin-right","scroll-margin-top","scroll-padding","scroll-padding-block","scroll-padding-block-end","scroll-padding-block-start","scroll-padding-bottom","scroll-padding-inline","scroll-padding-inline-end","scroll-padding-inline-start","scroll-padding-left","scroll-padding-right","scroll-padding-top","scroll-snap-align","scroll-snap-stop","scroll-snap-type","scrollbar-color","scrollbar-gutter","scrollbar-width","shape-image-threshold","shape-margin","shape-outside","speak","speak-as","src","tab-size","table-layout","text-align","text-align-all","text-align-last","text-combine-upright","text-decoration","text-decoration-color","text-decoration-line","text-decoration-style","text-emphasis","text-emphasis-color","text-emphasis-position","text-emphasis-style","text-indent","text-justify","text-orientation","text-overflow","text-rendering","text-shadow","text-transform","text-underline-position","top","transform","transform-box","transform-origin","transform-style","transition","transition-delay","transition-duration","transition-property","transition-timing-function","unicode-bidi","vertical-align","visibility","voice-balance","voice-duration","voice-family","voice-pitch","voice-range","voice-rate","voice-stress","voice-volume","white-space","widows","width","will-change","word-break","word-spacing","word-wrap","writing-mode","z-index"].reverse(),de=oe.concat(le) - ;var ge="[0-9](_*[0-9])*",ue=`\\.(${ge})`,be="[0-9a-fA-F](_*[0-9a-fA-F])*",me={ - className:"number",variants:[{ - begin:`(\\b(${ge})((${ue})|\\.)?|(${ue}))[eE][+-]?(${ge})[fFdD]?\\b`},{ - begin:`\\b(${ge})((${ue})[fFdD]?\\b|\\.([fFdD]\\b)?)`},{ - begin:`(${ue})[fFdD]?\\b`},{begin:`\\b(${ge})[fFdD]\\b`},{ - begin:`\\b0[xX]((${be})\\.?|(${be})?\\.(${be}))[pP][+-]?(${ge})[fFdD]?\\b`},{ - begin:"\\b(0|[1-9](_*[0-9])*)[lL]?\\b"},{begin:`\\b0[xX](${be})[lL]?\\b`},{ - begin:"\\b0(_*[0-7])*[lL]?\\b"},{begin:"\\b0[bB][01](_*[01])*[lL]?\\b"}], - relevance:0};function pe(e,n,t){return-1===t?"":e.replace(n,(a=>pe(e,n,t-1)))} - const _e="[A-Za-z$_][0-9A-Za-z$_]*",he=["as","in","of","if","for","while","finally","var","new","function","do","return","void","else","break","catch","instanceof","with","throw","case","default","try","switch","continue","typeof","delete","let","yield","const","class","debugger","async","await","static","import","from","export","extends"],fe=["true","false","null","undefined","NaN","Infinity"],Ee=["Object","Function","Boolean","Symbol","Math","Date","Number","BigInt","String","RegExp","Array","Float32Array","Float64Array","Int8Array","Uint8Array","Uint8ClampedArray","Int16Array","Int32Array","Uint16Array","Uint32Array","BigInt64Array","BigUint64Array","Set","Map","WeakSet","WeakMap","ArrayBuffer","SharedArrayBuffer","Atomics","DataView","JSON","Promise","Generator","GeneratorFunction","AsyncFunction","Reflect","Proxy","Intl","WebAssembly"],ye=["Error","EvalError","InternalError","RangeError","ReferenceError","SyntaxError","TypeError","URIError"],Ne=["setInterval","setTimeout","clearInterval","clearTimeout","require","exports","eval","isFinite","isNaN","parseFloat","parseInt","decodeURI","decodeURIComponent","encodeURI","encodeURIComponent","escape","unescape"],we=["arguments","this","super","console","window","document","localStorage","sessionStorage","module","global"],ve=[].concat(Ne,Ee,ye) - ;function Oe(e){const n=e.regex,t=_e,a={begin:/<[A-Za-z0-9\\._:-]+/, - end:/\/[A-Za-z0-9\\._:-]+>|\/>/,isTrulyOpeningTag:(e,n)=>{ - const t=e[0].length+e.index,a=e.input[t] - ;if("<"===a||","===a)return void n.ignoreMatch();let i - ;">"===a&&(((e,{after:n})=>{const t="",M={ - match:[/const|var|let/,/\s+/,t,/\s*/,/=\s*/,/(async\s*)?/,n.lookahead(x)], - keywords:"async",className:{1:"keyword",3:"title.function"},contains:[f]} - ;return{name:"JavaScript",aliases:["js","jsx","mjs","cjs"],keywords:i,exports:{ - PARAMS_CONTAINS:h,CLASS_REFERENCE:y},illegal:/#(?![$_A-z])/, - contains:[e.SHEBANG({label:"shebang",binary:"node",relevance:5}),{ - label:"use_strict",className:"meta",relevance:10, - begin:/^\s*['"]use (strict|asm)['"]/ - },e.APOS_STRING_MODE,e.QUOTE_STRING_MODE,d,g,u,b,m,{match:/\$\d+/},l,y,{ - className:"attr",begin:t+n.lookahead(":"),relevance:0},M,{ - begin:"("+e.RE_STARTERS_RE+"|\\b(case|return|throw)\\b)\\s*", - keywords:"return throw case",relevance:0,contains:[m,e.REGEXP_MODE,{ - className:"function",begin:x,returnBegin:!0,end:"\\s*=>",contains:[{ - className:"params",variants:[{begin:e.UNDERSCORE_IDENT_RE,relevance:0},{ - className:null,begin:/\(\s*\)/,skip:!0},{begin:/\(/,end:/\)/,excludeBegin:!0, - excludeEnd:!0,keywords:i,contains:h}]}]},{begin:/,/,relevance:0},{match:/\s+/, - relevance:0},{variants:[{begin:"<>",end:""},{ - match:/<[A-Za-z0-9\\._:-]+\s*\/>/},{begin:a.begin, - "on:begin":a.isTrulyOpeningTag,end:a.end}],subLanguage:"xml",contains:[{ - begin:a.begin,end:a.end,skip:!0,contains:["self"]}]}]},N,{ - beginKeywords:"while if switch catch for"},{ - begin:"\\b(?!function)"+e.UNDERSCORE_IDENT_RE+"\\([^()]*(\\([^()]*(\\([^()]*\\)[^()]*)*\\)[^()]*)*\\)\\s*\\{", - returnBegin:!0,label:"func.def",contains:[f,e.inherit(e.TITLE_MODE,{begin:t, - className:"title.function"})]},{match:/\.\.\./,relevance:0},O,{match:"\\$"+t, - relevance:0},{match:[/\bconstructor(?=\s*\()/],className:{1:"title.function"}, - contains:[f]},w,{relevance:0,match:/\b[A-Z][A-Z_0-9]+\b/, - className:"variable.constant"},E,k,{match:/\$[(.]/}]}} - const ke=e=>b(/\b/,e,/\w$/.test(e)?/\b/:/\B/),xe=["Protocol","Type"].map(ke),Me=["init","self"].map(ke),Se=["Any","Self"],Ae=["actor","any","associatedtype","async","await",/as\?/,/as!/,"as","borrowing","break","case","catch","class","consume","consuming","continue","convenience","copy","default","defer","deinit","didSet","distributed","do","dynamic","each","else","enum","extension","fallthrough",/fileprivate\(set\)/,"fileprivate","final","for","func","get","guard","if","import","indirect","infix",/init\?/,/init!/,"inout",/internal\(set\)/,"internal","in","is","isolated","nonisolated","lazy","let","macro","mutating","nonmutating",/open\(set\)/,"open","operator","optional","override","postfix","precedencegroup","prefix",/private\(set\)/,"private","protocol",/public\(set\)/,"public","repeat","required","rethrows","return","set","some","static","struct","subscript","super","switch","throws","throw",/try\?/,/try!/,"try","typealias",/unowned\(safe\)/,/unowned\(unsafe\)/,"unowned","var","weak","where","while","willSet"],Ce=["false","nil","true"],Te=["assignment","associativity","higherThan","left","lowerThan","none","right"],Re=["#colorLiteral","#column","#dsohandle","#else","#elseif","#endif","#error","#file","#fileID","#fileLiteral","#filePath","#function","#if","#imageLiteral","#keyPath","#line","#selector","#sourceLocation","#warning"],De=["abs","all","any","assert","assertionFailure","debugPrint","dump","fatalError","getVaList","isKnownUniquelyReferenced","max","min","numericCast","pointwiseMax","pointwiseMin","precondition","preconditionFailure","print","readLine","repeatElement","sequence","stride","swap","swift_unboxFromSwiftValueWithType","transcode","type","unsafeBitCast","unsafeDowncast","withExtendedLifetime","withUnsafeMutablePointer","withUnsafePointer","withVaList","withoutActuallyEscaping","zip"],Ie=m(/[/=\-+!*%<>&|^~?]/,/[\u00A1-\u00A7]/,/[\u00A9\u00AB]/,/[\u00AC\u00AE]/,/[\u00B0\u00B1]/,/[\u00B6\u00BB\u00BF\u00D7\u00F7]/,/[\u2016-\u2017]/,/[\u2020-\u2027]/,/[\u2030-\u203E]/,/[\u2041-\u2053]/,/[\u2055-\u205E]/,/[\u2190-\u23FF]/,/[\u2500-\u2775]/,/[\u2794-\u2BFF]/,/[\u2E00-\u2E7F]/,/[\u3001-\u3003]/,/[\u3008-\u3020]/,/[\u3030]/),Le=m(Ie,/[\u0300-\u036F]/,/[\u1DC0-\u1DFF]/,/[\u20D0-\u20FF]/,/[\uFE00-\uFE0F]/,/[\uFE20-\uFE2F]/),Be=b(Ie,Le,"*"),$e=m(/[a-zA-Z_]/,/[\u00A8\u00AA\u00AD\u00AF\u00B2-\u00B5\u00B7-\u00BA]/,/[\u00BC-\u00BE\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u00FF]/,/[\u0100-\u02FF\u0370-\u167F\u1681-\u180D\u180F-\u1DBF]/,/[\u1E00-\u1FFF]/,/[\u200B-\u200D\u202A-\u202E\u203F-\u2040\u2054\u2060-\u206F]/,/[\u2070-\u20CF\u2100-\u218F\u2460-\u24FF\u2776-\u2793]/,/[\u2C00-\u2DFF\u2E80-\u2FFF]/,/[\u3004-\u3007\u3021-\u302F\u3031-\u303F\u3040-\uD7FF]/,/[\uF900-\uFD3D\uFD40-\uFDCF\uFDF0-\uFE1F\uFE30-\uFE44]/,/[\uFE47-\uFEFE\uFF00-\uFFFD]/),ze=m($e,/\d/,/[\u0300-\u036F\u1DC0-\u1DFF\u20D0-\u20FF\uFE20-\uFE2F]/),Fe=b($e,ze,"*"),Ue=b(/[A-Z]/,ze,"*"),je=["attached","autoclosure",b(/convention\(/,m("swift","block","c"),/\)/),"discardableResult","dynamicCallable","dynamicMemberLookup","escaping","freestanding","frozen","GKInspectable","IBAction","IBDesignable","IBInspectable","IBOutlet","IBSegueAction","inlinable","main","nonobjc","NSApplicationMain","NSCopying","NSManaged",b(/objc\(/,Fe,/\)/),"objc","objcMembers","propertyWrapper","requires_stored_property_inits","resultBuilder","Sendable","testable","UIApplicationMain","unchecked","unknown","usableFromInline","warn_unqualified_access"],Pe=["iOS","iOSApplicationExtension","macOS","macOSApplicationExtension","macCatalyst","macCatalystApplicationExtension","watchOS","watchOSApplicationExtension","tvOS","tvOSApplicationExtension","swift"] - ;var Ke=Object.freeze({__proto__:null,grmr_bash:e=>{const n=e.regex,t={},a={ - begin:/\$\{/,end:/\}/,contains:["self",{begin:/:-/,contains:[t]}]} - ;Object.assign(t,{className:"variable",variants:[{ - begin:n.concat(/\$[\w\d#@][\w\d_]*/,"(?![\\w\\d])(?![$])")},a]});const i={ - className:"subst",begin:/\$\(/,end:/\)/,contains:[e.BACKSLASH_ESCAPE]},r={ - begin:/<<-?\s*(?=\w+)/,starts:{contains:[e.END_SAME_AS_BEGIN({begin:/(\w+)/, - end:/(\w+)/,className:"string"})]}},s={className:"string",begin:/"/,end:/"/, - contains:[e.BACKSLASH_ESCAPE,t,i]};i.contains.push(s);const o={begin:/\$?\(\(/, - end:/\)\)/,contains:[{begin:/\d+#[0-9a-f]+/,className:"number"},e.NUMBER_MODE,t] - },l=e.SHEBANG({binary:"(fish|bash|zsh|sh|csh|ksh|tcsh|dash|scsh)",relevance:10 - }),c={className:"function",begin:/\w[\w\d_]*\s*\(\s*\)\s*\{/,returnBegin:!0, - contains:[e.inherit(e.TITLE_MODE,{begin:/\w[\w\d_]*/})],relevance:0};return{ - name:"Bash",aliases:["sh"],keywords:{$pattern:/\b[a-z][a-z0-9._-]+\b/, - keyword:["if","then","else","elif","fi","for","while","until","in","do","done","case","esac","function","select"], - literal:["true","false"], - built_in:["break","cd","continue","eval","exec","exit","export","getopts","hash","pwd","readonly","return","shift","test","times","trap","umask","unset","alias","bind","builtin","caller","command","declare","echo","enable","help","let","local","logout","mapfile","printf","read","readarray","source","type","typeset","ulimit","unalias","set","shopt","autoload","bg","bindkey","bye","cap","chdir","clone","comparguments","compcall","compctl","compdescribe","compfiles","compgroups","compquote","comptags","comptry","compvalues","dirs","disable","disown","echotc","echoti","emulate","fc","fg","float","functions","getcap","getln","history","integer","jobs","kill","limit","log","noglob","popd","print","pushd","pushln","rehash","sched","setcap","setopt","stat","suspend","ttyctl","unfunction","unhash","unlimit","unsetopt","vared","wait","whence","where","which","zcompile","zformat","zftp","zle","zmodload","zparseopts","zprof","zpty","zregexparse","zsocket","zstyle","ztcp","chcon","chgrp","chown","chmod","cp","dd","df","dir","dircolors","ln","ls","mkdir","mkfifo","mknod","mktemp","mv","realpath","rm","rmdir","shred","sync","touch","truncate","vdir","b2sum","base32","base64","cat","cksum","comm","csplit","cut","expand","fmt","fold","head","join","md5sum","nl","numfmt","od","paste","ptx","pr","sha1sum","sha224sum","sha256sum","sha384sum","sha512sum","shuf","sort","split","sum","tac","tail","tr","tsort","unexpand","uniq","wc","arch","basename","chroot","date","dirname","du","echo","env","expr","factor","groups","hostid","id","link","logname","nice","nohup","nproc","pathchk","pinky","printenv","printf","pwd","readlink","runcon","seq","sleep","stat","stdbuf","stty","tee","test","timeout","tty","uname","unlink","uptime","users","who","whoami","yes"] - },contains:[l,e.SHEBANG(),c,o,e.HASH_COMMENT_MODE,r,{match:/(\/[a-z._-]+)+/},s,{ - match:/\\"/},{className:"string",begin:/'/,end:/'/},{match:/\\'/},t]}}, - grmr_c:e=>{const n=e.regex,t=e.COMMENT("//","$",{contains:[{begin:/\\\n/}] - }),a="decltype\\(auto\\)",i="[a-zA-Z_]\\w*::",r="("+a+"|"+n.optional(i)+"[a-zA-Z_]\\w*"+n.optional("<[^<>]+>")+")",s={ - className:"type",variants:[{begin:"\\b[a-z\\d_]*_t\\b"},{ - match:/\batomic_[a-z]{3,6}\b/}]},o={className:"string",variants:[{ - begin:'(u8?|U|L)?"',end:'"',illegal:"\\n",contains:[e.BACKSLASH_ESCAPE]},{ - begin:"(u8?|U|L)?'(\\\\(x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4,8}|[0-7]{3}|\\S)|.)", - end:"'",illegal:"."},e.END_SAME_AS_BEGIN({ - begin:/(?:u8?|U|L)?R"([^()\\ ]{0,16})\(/,end:/\)([^()\\ ]{0,16})"/})]},l={ - className:"number",variants:[{begin:"\\b(0b[01']+)"},{ - begin:"(-?)\\b([\\d']+(\\.[\\d']*)?|\\.[\\d']+)((ll|LL|l|L)(u|U)?|(u|U)(ll|LL|l|L)?|f|F|b|B)" - },{ - begin:"(-?)(\\b0[xX][a-fA-F0-9']+|(\\b[\\d']+(\\.[\\d']*)?|\\.[\\d']+)([eE][-+]?[\\d']+)?)" - }],relevance:0},c={className:"meta",begin:/#\s*[a-z]+\b/,end:/$/,keywords:{ - keyword:"if else elif endif define undef warning error line pragma _Pragma ifdef ifndef include" - },contains:[{begin:/\\\n/,relevance:0},e.inherit(o,{className:"string"}),{ - className:"string",begin:/<.*?>/},t,e.C_BLOCK_COMMENT_MODE]},d={ - className:"title",begin:n.optional(i)+e.IDENT_RE,relevance:0 - },g=n.optional(i)+e.IDENT_RE+"\\s*\\(",u={ - keyword:["asm","auto","break","case","continue","default","do","else","enum","extern","for","fortran","goto","if","inline","register","restrict","return","sizeof","struct","switch","typedef","union","volatile","while","_Alignas","_Alignof","_Atomic","_Generic","_Noreturn","_Static_assert","_Thread_local","alignas","alignof","noreturn","static_assert","thread_local","_Pragma"], - type:["float","double","signed","unsigned","int","short","long","char","void","_Bool","_Complex","_Imaginary","_Decimal32","_Decimal64","_Decimal128","const","static","complex","bool","imaginary"], - literal:"true false NULL", - built_in:"std string wstring cin cout cerr clog stdin stdout stderr stringstream istringstream ostringstream auto_ptr deque list queue stack vector map set pair bitset multiset multimap unordered_set unordered_map unordered_multiset unordered_multimap priority_queue make_pair array shared_ptr abort terminate abs acos asin atan2 atan calloc ceil cosh cos exit exp fabs floor fmod fprintf fputs free frexp fscanf future isalnum isalpha iscntrl isdigit isgraph islower isprint ispunct isspace isupper isxdigit tolower toupper labs ldexp log10 log malloc realloc memchr memcmp memcpy memset modf pow printf putchar puts scanf sinh sin snprintf sprintf sqrt sscanf strcat strchr strcmp strcpy strcspn strlen strncat strncmp strncpy strpbrk strrchr strspn strstr tanh tan vfprintf vprintf vsprintf endl initializer_list unique_ptr" - },b=[c,s,t,e.C_BLOCK_COMMENT_MODE,l,o],m={variants:[{begin:/=/,end:/;/},{ - begin:/\(/,end:/\)/},{beginKeywords:"new throw return else",end:/;/}], - keywords:u,contains:b.concat([{begin:/\(/,end:/\)/,keywords:u, - contains:b.concat(["self"]),relevance:0}]),relevance:0},p={ - begin:"("+r+"[\\*&\\s]+)+"+g,returnBegin:!0,end:/[{;=]/,excludeEnd:!0, - keywords:u,illegal:/[^\w\s\*&:<>.]/,contains:[{begin:a,keywords:u,relevance:0},{ - begin:g,returnBegin:!0,contains:[e.inherit(d,{className:"title.function"})], - relevance:0},{relevance:0,match:/,/},{className:"params",begin:/\(/,end:/\)/, - keywords:u,relevance:0,contains:[t,e.C_BLOCK_COMMENT_MODE,o,l,s,{begin:/\(/, - end:/\)/,keywords:u,relevance:0,contains:["self",t,e.C_BLOCK_COMMENT_MODE,o,l,s] - }]},s,t,e.C_BLOCK_COMMENT_MODE,c]};return{name:"C",aliases:["h"],keywords:u, - disableAutodetect:!0,illegal:"=]/,contains:[{ - beginKeywords:"final class struct"},e.TITLE_MODE]}]),exports:{preprocessor:c, - strings:o,keywords:u}}},grmr_cpp:e=>{const n=e.regex,t=e.COMMENT("//","$",{ - contains:[{begin:/\\\n/}] - }),a="decltype\\(auto\\)",i="[a-zA-Z_]\\w*::",r="(?!struct)("+a+"|"+n.optional(i)+"[a-zA-Z_]\\w*"+n.optional("<[^<>]+>")+")",s={ - className:"type",begin:"\\b[a-z\\d_]*_t\\b"},o={className:"string",variants:[{ - begin:'(u8?|U|L)?"',end:'"',illegal:"\\n",contains:[e.BACKSLASH_ESCAPE]},{ - begin:"(u8?|U|L)?'(\\\\(x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4,8}|[0-7]{3}|\\S)|.)", - end:"'",illegal:"."},e.END_SAME_AS_BEGIN({ - begin:/(?:u8?|U|L)?R"([^()\\ ]{0,16})\(/,end:/\)([^()\\ ]{0,16})"/})]},l={ - className:"number",variants:[{begin:"\\b(0b[01']+)"},{ - begin:"(-?)\\b([\\d']+(\\.[\\d']*)?|\\.[\\d']+)((ll|LL|l|L)(u|U)?|(u|U)(ll|LL|l|L)?|f|F|b|B)" - },{ - begin:"(-?)(\\b0[xX][a-fA-F0-9']+|(\\b[\\d']+(\\.[\\d']*)?|\\.[\\d']+)([eE][-+]?[\\d']+)?)" - }],relevance:0},c={className:"meta",begin:/#\s*[a-z]+\b/,end:/$/,keywords:{ - keyword:"if else elif endif define undef warning error line pragma _Pragma ifdef ifndef include" - },contains:[{begin:/\\\n/,relevance:0},e.inherit(o,{className:"string"}),{ - className:"string",begin:/<.*?>/},t,e.C_BLOCK_COMMENT_MODE]},d={ - className:"title",begin:n.optional(i)+e.IDENT_RE,relevance:0 - },g=n.optional(i)+e.IDENT_RE+"\\s*\\(",u={ - type:["bool","char","char16_t","char32_t","char8_t","double","float","int","long","short","void","wchar_t","unsigned","signed","const","static"], - keyword:["alignas","alignof","and","and_eq","asm","atomic_cancel","atomic_commit","atomic_noexcept","auto","bitand","bitor","break","case","catch","class","co_await","co_return","co_yield","compl","concept","const_cast|10","consteval","constexpr","constinit","continue","decltype","default","delete","do","dynamic_cast|10","else","enum","explicit","export","extern","false","final","for","friend","goto","if","import","inline","module","mutable","namespace","new","noexcept","not","not_eq","nullptr","operator","or","or_eq","override","private","protected","public","reflexpr","register","reinterpret_cast|10","requires","return","sizeof","static_assert","static_cast|10","struct","switch","synchronized","template","this","thread_local","throw","transaction_safe","transaction_safe_dynamic","true","try","typedef","typeid","typename","union","using","virtual","volatile","while","xor","xor_eq"], - literal:["NULL","false","nullopt","nullptr","true"],built_in:["_Pragma"], - _type_hints:["any","auto_ptr","barrier","binary_semaphore","bitset","complex","condition_variable","condition_variable_any","counting_semaphore","deque","false_type","future","imaginary","initializer_list","istringstream","jthread","latch","lock_guard","multimap","multiset","mutex","optional","ostringstream","packaged_task","pair","promise","priority_queue","queue","recursive_mutex","recursive_timed_mutex","scoped_lock","set","shared_future","shared_lock","shared_mutex","shared_timed_mutex","shared_ptr","stack","string_view","stringstream","timed_mutex","thread","true_type","tuple","unique_lock","unique_ptr","unordered_map","unordered_multimap","unordered_multiset","unordered_set","variant","vector","weak_ptr","wstring","wstring_view"] - },b={className:"function.dispatch",relevance:0,keywords:{ - _hint:["abort","abs","acos","apply","as_const","asin","atan","atan2","calloc","ceil","cerr","cin","clog","cos","cosh","cout","declval","endl","exchange","exit","exp","fabs","floor","fmod","forward","fprintf","fputs","free","frexp","fscanf","future","invoke","isalnum","isalpha","iscntrl","isdigit","isgraph","islower","isprint","ispunct","isspace","isupper","isxdigit","labs","launder","ldexp","log","log10","make_pair","make_shared","make_shared_for_overwrite","make_tuple","make_unique","malloc","memchr","memcmp","memcpy","memset","modf","move","pow","printf","putchar","puts","realloc","scanf","sin","sinh","snprintf","sprintf","sqrt","sscanf","std","stderr","stdin","stdout","strcat","strchr","strcmp","strcpy","strcspn","strlen","strncat","strncmp","strncpy","strpbrk","strrchr","strspn","strstr","swap","tan","tanh","terminate","to_underlying","tolower","toupper","vfprintf","visit","vprintf","vsprintf"] - }, - begin:n.concat(/\b/,/(?!decltype)/,/(?!if)/,/(?!for)/,/(?!switch)/,/(?!while)/,e.IDENT_RE,n.lookahead(/(<[^<>]+>|)\s*\(/)) - },m=[b,c,s,t,e.C_BLOCK_COMMENT_MODE,l,o],p={variants:[{begin:/=/,end:/;/},{ - begin:/\(/,end:/\)/},{beginKeywords:"new throw return else",end:/;/}], - keywords:u,contains:m.concat([{begin:/\(/,end:/\)/,keywords:u, - contains:m.concat(["self"]),relevance:0}]),relevance:0},_={className:"function", - begin:"("+r+"[\\*&\\s]+)+"+g,returnBegin:!0,end:/[{;=]/,excludeEnd:!0, - keywords:u,illegal:/[^\w\s\*&:<>.]/,contains:[{begin:a,keywords:u,relevance:0},{ - begin:g,returnBegin:!0,contains:[d],relevance:0},{begin:/::/,relevance:0},{ - begin:/:/,endsWithParent:!0,contains:[o,l]},{relevance:0,match:/,/},{ - className:"params",begin:/\(/,end:/\)/,keywords:u,relevance:0, - contains:[t,e.C_BLOCK_COMMENT_MODE,o,l,s,{begin:/\(/,end:/\)/,keywords:u, - relevance:0,contains:["self",t,e.C_BLOCK_COMMENT_MODE,o,l,s]}] - },s,t,e.C_BLOCK_COMMENT_MODE,c]};return{name:"C++", - aliases:["cc","c++","h++","hpp","hh","hxx","cxx"],keywords:u,illegal:"",keywords:u,contains:["self",s]},{begin:e.IDENT_RE+"::",keywords:u},{ - match:[/\b(?:enum(?:\s+(?:class|struct))?|class|struct|union)/,/\s+/,/\w+/], - className:{1:"keyword",3:"title.class"}}])}},grmr_csharp:e=>{const n={ - keyword:["abstract","as","base","break","case","catch","class","const","continue","do","else","event","explicit","extern","finally","fixed","for","foreach","goto","if","implicit","in","interface","internal","is","lock","namespace","new","operator","out","override","params","private","protected","public","readonly","record","ref","return","scoped","sealed","sizeof","stackalloc","static","struct","switch","this","throw","try","typeof","unchecked","unsafe","using","virtual","void","volatile","while"].concat(["add","alias","and","ascending","async","await","by","descending","equals","from","get","global","group","init","into","join","let","nameof","not","notnull","on","or","orderby","partial","remove","select","set","unmanaged","value|0","var","when","where","with","yield"]), - built_in:["bool","byte","char","decimal","delegate","double","dynamic","enum","float","int","long","nint","nuint","object","sbyte","short","string","ulong","uint","ushort"], - literal:["default","false","null","true"]},t=e.inherit(e.TITLE_MODE,{ - begin:"[a-zA-Z](\\.?\\w)*"}),a={className:"number",variants:[{ - begin:"\\b(0b[01']+)"},{ - begin:"(-?)\\b([\\d']+(\\.[\\d']*)?|\\.[\\d']+)(u|U|l|L|ul|UL|f|F|b|B)"},{ - begin:"(-?)(\\b0[xX][a-fA-F0-9']+|(\\b[\\d']+(\\.[\\d']*)?|\\.[\\d']+)([eE][-+]?[\\d']+)?)" - }],relevance:0},i={className:"string",begin:'@"',end:'"',contains:[{begin:'""'}] - },r=e.inherit(i,{illegal:/\n/}),s={className:"subst",begin:/\{/,end:/\}/, - keywords:n},o=e.inherit(s,{illegal:/\n/}),l={className:"string",begin:/\$"/, - end:'"',illegal:/\n/,contains:[{begin:/\{\{/},{begin:/\}\}/ - },e.BACKSLASH_ESCAPE,o]},c={className:"string",begin:/\$@"/,end:'"',contains:[{ - begin:/\{\{/},{begin:/\}\}/},{begin:'""'},s]},d=e.inherit(c,{illegal:/\n/, - contains:[{begin:/\{\{/},{begin:/\}\}/},{begin:'""'},o]}) - ;s.contains=[c,l,i,e.APOS_STRING_MODE,e.QUOTE_STRING_MODE,a,e.C_BLOCK_COMMENT_MODE], - o.contains=[d,l,r,e.APOS_STRING_MODE,e.QUOTE_STRING_MODE,a,e.inherit(e.C_BLOCK_COMMENT_MODE,{ - illegal:/\n/})];const g={variants:[c,l,i,e.APOS_STRING_MODE,e.QUOTE_STRING_MODE] - },u={begin:"<",end:">",contains:[{beginKeywords:"in out"},t] - },b=e.IDENT_RE+"(<"+e.IDENT_RE+"(\\s*,\\s*"+e.IDENT_RE+")*>)?(\\[\\])?",m={ - begin:"@"+e.IDENT_RE,relevance:0};return{name:"C#",aliases:["cs","c#"], - keywords:n,illegal:/::/,contains:[e.COMMENT("///","$",{returnBegin:!0, - contains:[{className:"doctag",variants:[{begin:"///",relevance:0},{ - begin:"\x3c!--|--\x3e"},{begin:""}]}] - }),e.C_LINE_COMMENT_MODE,e.C_BLOCK_COMMENT_MODE,{className:"meta",begin:"#", - end:"$",keywords:{ - keyword:"if else elif endif define undef warning error line region endregion pragma checksum" - }},g,a,{beginKeywords:"class interface",relevance:0,end:/[{;=]/, - illegal:/[^\s:,]/,contains:[{beginKeywords:"where class" - },t,u,e.C_LINE_COMMENT_MODE,e.C_BLOCK_COMMENT_MODE]},{beginKeywords:"namespace", - relevance:0,end:/[{;=]/,illegal:/[^\s:]/, - contains:[t,e.C_LINE_COMMENT_MODE,e.C_BLOCK_COMMENT_MODE]},{ - beginKeywords:"record",relevance:0,end:/[{;=]/,illegal:/[^\s:]/, - contains:[t,u,e.C_LINE_COMMENT_MODE,e.C_BLOCK_COMMENT_MODE]},{className:"meta", - begin:"^\\s*\\[(?=[\\w])",excludeBegin:!0,end:"\\]",excludeEnd:!0,contains:[{ - className:"string",begin:/"/,end:/"/}]},{ - beginKeywords:"new return throw await else",relevance:0},{className:"function", - begin:"("+b+"\\s+)+"+e.IDENT_RE+"\\s*(<[^=]+>\\s*)?\\(",returnBegin:!0, - end:/\s*[{;=]/,excludeEnd:!0,keywords:n,contains:[{ - beginKeywords:"public private protected static internal protected abstract async extern override unsafe virtual new sealed partial", - relevance:0},{begin:e.IDENT_RE+"\\s*(<[^=]+>\\s*)?\\(",returnBegin:!0, - contains:[e.TITLE_MODE,u],relevance:0},{match:/\(\)/},{className:"params", - begin:/\(/,end:/\)/,excludeBegin:!0,excludeEnd:!0,keywords:n,relevance:0, - contains:[g,a,e.C_BLOCK_COMMENT_MODE] - },e.C_LINE_COMMENT_MODE,e.C_BLOCK_COMMENT_MODE]},m]}},grmr_css:e=>{ - const n=e.regex,t=ie(e),a=[e.APOS_STRING_MODE,e.QUOTE_STRING_MODE];return{ - name:"CSS",case_insensitive:!0,illegal:/[=|'\$]/,keywords:{ - keyframePosition:"from to"},classNameAliases:{keyframePosition:"selector-tag"}, - contains:[t.BLOCK_COMMENT,{begin:/-(webkit|moz|ms|o)-(?=[a-z])/ - },t.CSS_NUMBER_MODE,{className:"selector-id",begin:/#[A-Za-z0-9_-]+/,relevance:0 - },{className:"selector-class",begin:"\\.[a-zA-Z-][a-zA-Z0-9_-]*",relevance:0 - },t.ATTRIBUTE_SELECTOR_MODE,{className:"selector-pseudo",variants:[{ - begin:":("+oe.join("|")+")"},{begin:":(:)?("+le.join("|")+")"}] - },t.CSS_VARIABLE,{className:"attribute",begin:"\\b("+ce.join("|")+")\\b"},{ - begin:/:/,end:/[;}{]/, - contains:[t.BLOCK_COMMENT,t.HEXCOLOR,t.IMPORTANT,t.CSS_NUMBER_MODE,...a,{ - begin:/(url|data-uri)\(/,end:/\)/,relevance:0,keywords:{built_in:"url data-uri" - },contains:[...a,{className:"string",begin:/[^)]/,endsWithParent:!0, - excludeEnd:!0}]},t.FUNCTION_DISPATCH]},{begin:n.lookahead(/@/),end:"[{;]", - relevance:0,illegal:/:/,contains:[{className:"keyword",begin:/@-?\w[\w]*(-\w+)*/ - },{begin:/\s/,endsWithParent:!0,excludeEnd:!0,relevance:0,keywords:{ - $pattern:/[a-z-]+/,keyword:"and or not only",attribute:se.join(" ")},contains:[{ - begin:/[a-z-]+(?=:)/,className:"attribute"},...a,t.CSS_NUMBER_MODE]}]},{ - className:"selector-tag",begin:"\\b("+re.join("|")+")\\b"}]}},grmr_diff:e=>{ - const n=e.regex;return{name:"Diff",aliases:["patch"],contains:[{ - className:"meta",relevance:10, - match:n.either(/^@@ +-\d+,\d+ +\+\d+,\d+ +@@/,/^\*\*\* +\d+,\d+ +\*\*\*\*$/,/^--- +\d+,\d+ +----$/) - },{className:"comment",variants:[{ - begin:n.either(/Index: /,/^index/,/={3,}/,/^-{3}/,/^\*{3} /,/^\+{3}/,/^diff --git/), - end:/$/},{match:/^\*{15}$/}]},{className:"addition",begin:/^\+/,end:/$/},{ - className:"deletion",begin:/^-/,end:/$/},{className:"addition",begin:/^!/, - end:/$/}]}},grmr_go:e=>{const n={ - keyword:["break","case","chan","const","continue","default","defer","else","fallthrough","for","func","go","goto","if","import","interface","map","package","range","return","select","struct","switch","type","var"], - type:["bool","byte","complex64","complex128","error","float32","float64","int8","int16","int32","int64","string","uint8","uint16","uint32","uint64","int","uint","uintptr","rune"], - literal:["true","false","iota","nil"], - built_in:["append","cap","close","complex","copy","imag","len","make","new","panic","print","println","real","recover","delete"] - };return{name:"Go",aliases:["golang"],keywords:n,illegal:"{const n=e.regex;return{name:"GraphQL",aliases:["gql"], - case_insensitive:!0,disableAutodetect:!1,keywords:{ - keyword:["query","mutation","subscription","type","input","schema","directive","interface","union","scalar","fragment","enum","on"], - literal:["true","false","null"]}, - contains:[e.HASH_COMMENT_MODE,e.QUOTE_STRING_MODE,e.NUMBER_MODE,{ - scope:"punctuation",match:/[.]{3}/,relevance:0},{scope:"punctuation", - begin:/[\!\(\)\:\=\[\]\{\|\}]{1}/,relevance:0},{scope:"variable",begin:/\$/, - end:/\W/,excludeEnd:!0,relevance:0},{scope:"meta",match:/@\w+/,excludeEnd:!0},{ - scope:"symbol",begin:n.concat(/[_A-Za-z][_0-9A-Za-z]*/,n.lookahead(/\s*:/)), - relevance:0}],illegal:[/[;<']/,/BEGIN/]}},grmr_ini:e=>{const n=e.regex,t={ - className:"number",relevance:0,variants:[{begin:/([+-]+)?[\d]+_[\d_]+/},{ - begin:e.NUMBER_RE}]},a=e.COMMENT();a.variants=[{begin:/;/,end:/$/},{begin:/#/, - end:/$/}];const i={className:"variable",variants:[{begin:/\$[\w\d"][\w\d_]*/},{ - begin:/\$\{(.*?)\}/}]},r={className:"literal", - begin:/\bon|off|true|false|yes|no\b/},s={className:"string", - contains:[e.BACKSLASH_ESCAPE],variants:[{begin:"'''",end:"'''",relevance:10},{ - begin:'"""',end:'"""',relevance:10},{begin:'"',end:'"'},{begin:"'",end:"'"}] - },o={begin:/\[/,end:/\]/,contains:[a,r,i,s,t,"self"],relevance:0 - },l=n.either(/[A-Za-z0-9_-]+/,/"(\\"|[^"])*"/,/'[^']*'/);return{ - name:"TOML, also INI",aliases:["toml"],case_insensitive:!0,illegal:/\S/, - contains:[a,{className:"section",begin:/\[+/,end:/\]+/},{ - begin:n.concat(l,"(\\s*\\.\\s*",l,")*",n.lookahead(/\s*=\s*[^#\s]/)), - className:"attr",starts:{end:/$/,contains:[a,o,r,i,s,t]}}]}},grmr_java:e=>{ - const n=e.regex,t="[\xc0-\u02b8a-zA-Z_$][\xc0-\u02b8a-zA-Z_$0-9]*",a=t+pe("(?:<"+t+"~~~(?:\\s*,\\s*"+t+"~~~)*>)?",/~~~/g,2),i={ - keyword:["synchronized","abstract","private","var","static","if","const ","for","while","strictfp","finally","protected","import","native","final","void","enum","else","break","transient","catch","instanceof","volatile","case","assert","package","default","public","try","switch","continue","throws","protected","public","private","module","requires","exports","do","sealed","yield","permits"], - literal:["false","true","null"], - type:["char","boolean","long","float","int","byte","short","double"], - built_in:["super","this"]},r={className:"meta",begin:"@"+t,contains:[{ - begin:/\(/,end:/\)/,contains:["self"]}]},s={className:"params",begin:/\(/, - end:/\)/,keywords:i,relevance:0,contains:[e.C_BLOCK_COMMENT_MODE],endsParent:!0} - ;return{name:"Java",aliases:["jsp"],keywords:i,illegal:/<\/|#/, - contains:[e.COMMENT("/\\*\\*","\\*/",{relevance:0,contains:[{begin:/\w+@/, - relevance:0},{className:"doctag",begin:"@[A-Za-z]+"}]}),{ - begin:/import java\.[a-z]+\./,keywords:"import",relevance:2 - },e.C_LINE_COMMENT_MODE,e.C_BLOCK_COMMENT_MODE,{begin:/"""/,end:/"""/, - className:"string",contains:[e.BACKSLASH_ESCAPE] - },e.APOS_STRING_MODE,e.QUOTE_STRING_MODE,{ - match:[/\b(?:class|interface|enum|extends|implements|new)/,/\s+/,t],className:{ - 1:"keyword",3:"title.class"}},{match:/non-sealed/,scope:"keyword"},{ - begin:[n.concat(/(?!else)/,t),/\s+/,t,/\s+/,/=(?!=)/],className:{1:"type", - 3:"variable",5:"operator"}},{begin:[/record/,/\s+/,t],className:{1:"keyword", - 3:"title.class"},contains:[s,e.C_LINE_COMMENT_MODE,e.C_BLOCK_COMMENT_MODE]},{ - beginKeywords:"new throw return else",relevance:0},{ - begin:["(?:"+a+"\\s+)",e.UNDERSCORE_IDENT_RE,/\s*(?=\()/],className:{ - 2:"title.function"},keywords:i,contains:[{className:"params",begin:/\(/, - end:/\)/,keywords:i,relevance:0, - contains:[r,e.APOS_STRING_MODE,e.QUOTE_STRING_MODE,me,e.C_BLOCK_COMMENT_MODE] - },e.C_LINE_COMMENT_MODE,e.C_BLOCK_COMMENT_MODE]},me,r]}},grmr_javascript:Oe, - grmr_json:e=>{const n=["true","false","null"],t={scope:"literal", - beginKeywords:n.join(" ")};return{name:"JSON",keywords:{literal:n},contains:[{ - className:"attr",begin:/"(\\.|[^\\"\r\n])*"(?=\s*:)/,relevance:1.01},{ - match:/[{}[\],:]/,className:"punctuation",relevance:0 - },e.QUOTE_STRING_MODE,t,e.C_NUMBER_MODE,e.C_LINE_COMMENT_MODE,e.C_BLOCK_COMMENT_MODE], - illegal:"\\S"}},grmr_kotlin:e=>{const n={ - keyword:"abstract as val var vararg get set class object open private protected public noinline crossinline dynamic final enum if else do while for when throw try catch finally import package is in fun override companion reified inline lateinit init interface annotation data sealed internal infix operator out by constructor super tailrec where const inner suspend typealias external expect actual", - built_in:"Byte Short Char Int Long Boolean Float Double Void Unit Nothing", - literal:"true false null"},t={className:"symbol",begin:e.UNDERSCORE_IDENT_RE+"@" - },a={className:"subst",begin:/\$\{/,end:/\}/,contains:[e.C_NUMBER_MODE]},i={ - className:"variable",begin:"\\$"+e.UNDERSCORE_IDENT_RE},r={className:"string", - variants:[{begin:'"""',end:'"""(?=[^"])',contains:[i,a]},{begin:"'",end:"'", - illegal:/\n/,contains:[e.BACKSLASH_ESCAPE]},{begin:'"',end:'"',illegal:/\n/, - contains:[e.BACKSLASH_ESCAPE,i,a]}]};a.contains.push(r);const s={ - className:"meta", - begin:"@(?:file|property|field|get|set|receiver|param|setparam|delegate)\\s*:(?:\\s*"+e.UNDERSCORE_IDENT_RE+")?" - },o={className:"meta",begin:"@"+e.UNDERSCORE_IDENT_RE,contains:[{begin:/\(/, - end:/\)/,contains:[e.inherit(r,{className:"string"}),"self"]}] - },l=me,c=e.COMMENT("/\\*","\\*/",{contains:[e.C_BLOCK_COMMENT_MODE]}),d={ - variants:[{className:"type",begin:e.UNDERSCORE_IDENT_RE},{begin:/\(/,end:/\)/, - contains:[]}]},g=d;return g.variants[1].contains=[d],d.variants[1].contains=[g], - {name:"Kotlin",aliases:["kt","kts"],keywords:n, - contains:[e.COMMENT("/\\*\\*","\\*/",{relevance:0,contains:[{className:"doctag", - begin:"@[A-Za-z]+"}]}),e.C_LINE_COMMENT_MODE,c,{className:"keyword", - begin:/\b(break|continue|return|this)\b/,starts:{contains:[{className:"symbol", - begin:/@\w+/}]}},t,s,o,{className:"function",beginKeywords:"fun",end:"[(]|$", - returnBegin:!0,excludeEnd:!0,keywords:n,relevance:5,contains:[{ - begin:e.UNDERSCORE_IDENT_RE+"\\s*\\(",returnBegin:!0,relevance:0, - contains:[e.UNDERSCORE_TITLE_MODE]},{className:"type",begin://, - keywords:"reified",relevance:0},{className:"params",begin:/\(/,end:/\)/, - endsParent:!0,keywords:n,relevance:0,contains:[{begin:/:/,end:/[=,\/]/, - endsWithParent:!0,contains:[d,e.C_LINE_COMMENT_MODE,c],relevance:0 - },e.C_LINE_COMMENT_MODE,c,s,o,r,e.C_NUMBER_MODE]},c]},{ - begin:[/class|interface|trait/,/\s+/,e.UNDERSCORE_IDENT_RE],beginScope:{ - 3:"title.class"},keywords:"class interface trait",end:/[:\{(]|$/,excludeEnd:!0, - illegal:"extends implements",contains:[{ - beginKeywords:"public protected internal private constructor" - },e.UNDERSCORE_TITLE_MODE,{className:"type",begin://,excludeBegin:!0, - excludeEnd:!0,relevance:0},{className:"type",begin:/[,:]\s*/,end:/[<\(,){\s]|$/, - excludeBegin:!0,returnEnd:!0},s,o]},r,{className:"meta",begin:"^#!/usr/bin/env", - end:"$",illegal:"\n"},l]}},grmr_less:e=>{ - const n=ie(e),t=de,a="[\\w-]+",i="("+a+"|@\\{"+a+"\\})",r=[],s=[],o=e=>({ - className:"string",begin:"~?"+e+".*?"+e}),l=(e,n,t)=>({className:e,begin:n, - relevance:t}),c={$pattern:/[a-z-]+/,keyword:"and or not only", - attribute:se.join(" ")},d={begin:"\\(",end:"\\)",contains:s,keywords:c, - relevance:0} - ;s.push(e.C_LINE_COMMENT_MODE,e.C_BLOCK_COMMENT_MODE,o("'"),o('"'),n.CSS_NUMBER_MODE,{ - begin:"(url|data-uri)\\(",starts:{className:"string",end:"[\\)\\n]", - excludeEnd:!0} - },n.HEXCOLOR,d,l("variable","@@?"+a,10),l("variable","@\\{"+a+"\\}"),l("built_in","~?`[^`]*?`"),{ - className:"attribute",begin:a+"\\s*:",end:":",returnBegin:!0,excludeEnd:!0 - },n.IMPORTANT,{beginKeywords:"and not"},n.FUNCTION_DISPATCH);const g=s.concat({ - begin:/\{/,end:/\}/,contains:r}),u={beginKeywords:"when",endsWithParent:!0, - contains:[{beginKeywords:"and not"}].concat(s)},b={begin:i+"\\s*:", - returnBegin:!0,end:/[;}]/,relevance:0,contains:[{begin:/-(webkit|moz|ms|o)-/ - },n.CSS_VARIABLE,{className:"attribute",begin:"\\b("+ce.join("|")+")\\b", - end:/(?=:)/,starts:{endsWithParent:!0,illegal:"[<=$]",relevance:0,contains:s}}] - },m={className:"keyword", - begin:"@(import|media|charset|font-face|(-[a-z]+-)?keyframes|supports|document|namespace|page|viewport|host)\\b", - starts:{end:"[;{}]",keywords:c,returnEnd:!0,contains:s,relevance:0}},p={ - className:"variable",variants:[{begin:"@"+a+"\\s*:",relevance:15},{begin:"@"+a - }],starts:{end:"[;}]",returnEnd:!0,contains:g}},_={variants:[{ - begin:"[\\.#:&\\[>]",end:"[;{}]"},{begin:i,end:/\{/}],returnBegin:!0, - returnEnd:!0,illegal:"[<='$\"]",relevance:0, - contains:[e.C_LINE_COMMENT_MODE,e.C_BLOCK_COMMENT_MODE,u,l("keyword","all\\b"),l("variable","@\\{"+a+"\\}"),{ - begin:"\\b("+re.join("|")+")\\b",className:"selector-tag" - },n.CSS_NUMBER_MODE,l("selector-tag",i,0),l("selector-id","#"+i),l("selector-class","\\."+i,0),l("selector-tag","&",0),n.ATTRIBUTE_SELECTOR_MODE,{ - className:"selector-pseudo",begin:":("+oe.join("|")+")"},{ - className:"selector-pseudo",begin:":(:)?("+le.join("|")+")"},{begin:/\(/, - end:/\)/,relevance:0,contains:g},{begin:"!important"},n.FUNCTION_DISPATCH]},h={ - begin:a+":(:)?"+`(${t.join("|")})`,returnBegin:!0,contains:[_]} - ;return r.push(e.C_LINE_COMMENT_MODE,e.C_BLOCK_COMMENT_MODE,m,p,h,b,_,u,n.FUNCTION_DISPATCH), - {name:"Less",case_insensitive:!0,illegal:"[=>'/<($\"]",contains:r}}, - grmr_lua:e=>{const n="\\[=*\\[",t="\\]=*\\]",a={begin:n,end:t,contains:["self"] - },i=[e.COMMENT("--(?!"+n+")","$"),e.COMMENT("--"+n,t,{contains:[a],relevance:10 - })];return{name:"Lua",keywords:{$pattern:e.UNDERSCORE_IDENT_RE, - literal:"true false nil", - keyword:"and break do else elseif end for goto if in local not or repeat return then until while", - built_in:"_G _ENV _VERSION __index __newindex __mode __call __metatable __tostring __len __gc __add __sub __mul __div __mod __pow __concat __unm __eq __lt __le assert collectgarbage dofile error getfenv getmetatable ipairs load loadfile loadstring module next pairs pcall print rawequal rawget rawset require select setfenv setmetatable tonumber tostring type unpack xpcall arg self coroutine resume yield status wrap create running debug getupvalue debug sethook getmetatable gethook setmetatable setlocal traceback setfenv getinfo setupvalue getlocal getregistry getfenv io lines write close flush open output type read stderr stdin input stdout popen tmpfile math log max acos huge ldexp pi cos tanh pow deg tan cosh sinh random randomseed frexp ceil floor rad abs sqrt modf asin min mod fmod log10 atan2 exp sin atan os exit setlocale date getenv difftime remove time clock tmpname rename execute package preload loadlib loaded loaders cpath config path seeall string sub upper len gfind rep find match char dump gmatch reverse byte format gsub lower table setn insert getn foreachi maxn foreach concat sort remove" - },contains:i.concat([{className:"function",beginKeywords:"function",end:"\\)", - contains:[e.inherit(e.TITLE_MODE,{ - begin:"([_a-zA-Z]\\w*\\.)*([_a-zA-Z]\\w*:)?[_a-zA-Z]\\w*"}),{className:"params", - begin:"\\(",endsWithParent:!0,contains:i}].concat(i) - },e.C_NUMBER_MODE,e.APOS_STRING_MODE,e.QUOTE_STRING_MODE,{className:"string", - begin:n,end:t,contains:[a],relevance:5}])}},grmr_makefile:e=>{const n={ - className:"variable",variants:[{begin:"\\$\\("+e.UNDERSCORE_IDENT_RE+"\\)", - contains:[e.BACKSLASH_ESCAPE]},{begin:/\$[@%{ - const n={begin:/<\/?[A-Za-z_]/,end:">",subLanguage:"xml",relevance:0},t={ - variants:[{begin:/\[.+?\]\[.*?\]/,relevance:0},{ - begin:/\[.+?\]\(((data|javascript|mailto):|(?:http|ftp)s?:\/\/).*?\)/, - relevance:2},{ - begin:e.regex.concat(/\[.+?\]\(/,/[A-Za-z][A-Za-z0-9+.-]*/,/:\/\/.*?\)/), - relevance:2},{begin:/\[.+?\]\([./?&#].*?\)/,relevance:1},{ - begin:/\[.*?\]\(.*?\)/,relevance:0}],returnBegin:!0,contains:[{match:/\[(?=\])/ - },{className:"string",relevance:0,begin:"\\[",end:"\\]",excludeBegin:!0, - returnEnd:!0},{className:"link",relevance:0,begin:"\\]\\(",end:"\\)", - excludeBegin:!0,excludeEnd:!0},{className:"symbol",relevance:0,begin:"\\]\\[", - end:"\\]",excludeBegin:!0,excludeEnd:!0}]},a={className:"strong",contains:[], - variants:[{begin:/_{2}(?!\s)/,end:/_{2}/},{begin:/\*{2}(?!\s)/,end:/\*{2}/}] - },i={className:"emphasis",contains:[],variants:[{begin:/\*(?![*\s])/,end:/\*/},{ - begin:/_(?![_\s])/,end:/_/,relevance:0}]},r=e.inherit(a,{contains:[] - }),s=e.inherit(i,{contains:[]});a.contains.push(s),i.contains.push(r) - ;let o=[n,t];return[a,i,r,s].forEach((e=>{e.contains=e.contains.concat(o) - })),o=o.concat(a,i),{name:"Markdown",aliases:["md","mkdown","mkd"],contains:[{ - className:"section",variants:[{begin:"^#{1,6}",end:"$",contains:o},{ - begin:"(?=^.+?\\n[=-]{2,}$)",contains:[{begin:"^[=-]*$"},{begin:"^",end:"\\n", - contains:o}]}]},n,{className:"bullet",begin:"^[ \t]*([*+-]|(\\d+\\.))(?=\\s+)", - end:"\\s+",excludeEnd:!0},a,i,{className:"quote",begin:"^>\\s+",contains:o, - end:"$"},{className:"code",variants:[{begin:"(`{3,})[^`](.|\\n)*?\\1`*[ ]*"},{ - begin:"(~{3,})[^~](.|\\n)*?\\1~*[ ]*"},{begin:"```",end:"```+[ ]*$"},{ - begin:"~~~",end:"~~~+[ ]*$"},{begin:"`.+?`"},{begin:"(?=^( {4}|\\t))", - contains:[{begin:"^( {4}|\\t)",end:"(\\n)$"}],relevance:0}]},{ - begin:"^[-\\*]{3,}",end:"$"},t,{begin:/^\[[^\n]+\]:/,returnBegin:!0,contains:[{ - className:"symbol",begin:/\[/,end:/\]/,excludeBegin:!0,excludeEnd:!0},{ - className:"link",begin:/:\s*/,end:/$/,excludeBegin:!0}]}]}},grmr_objectivec:e=>{ - const n=/[a-zA-Z@][a-zA-Z0-9_]*/,t={$pattern:n, - keyword:["@interface","@class","@protocol","@implementation"]};return{ - name:"Objective-C",aliases:["mm","objc","obj-c","obj-c++","objective-c++"], - keywords:{"variable.language":["this","super"],$pattern:n, - keyword:["while","export","sizeof","typedef","const","struct","for","union","volatile","static","mutable","if","do","return","goto","enum","else","break","extern","asm","case","default","register","explicit","typename","switch","continue","inline","readonly","assign","readwrite","self","@synchronized","id","typeof","nonatomic","IBOutlet","IBAction","strong","weak","copy","in","out","inout","bycopy","byref","oneway","__strong","__weak","__block","__autoreleasing","@private","@protected","@public","@try","@property","@end","@throw","@catch","@finally","@autoreleasepool","@synthesize","@dynamic","@selector","@optional","@required","@encode","@package","@import","@defs","@compatibility_alias","__bridge","__bridge_transfer","__bridge_retained","__bridge_retain","__covariant","__contravariant","__kindof","_Nonnull","_Nullable","_Null_unspecified","__FUNCTION__","__PRETTY_FUNCTION__","__attribute__","getter","setter","retain","unsafe_unretained","nonnull","nullable","null_unspecified","null_resettable","class","instancetype","NS_DESIGNATED_INITIALIZER","NS_UNAVAILABLE","NS_REQUIRES_SUPER","NS_RETURNS_INNER_POINTER","NS_INLINE","NS_AVAILABLE","NS_DEPRECATED","NS_ENUM","NS_OPTIONS","NS_SWIFT_UNAVAILABLE","NS_ASSUME_NONNULL_BEGIN","NS_ASSUME_NONNULL_END","NS_REFINED_FOR_SWIFT","NS_SWIFT_NAME","NS_SWIFT_NOTHROW","NS_DURING","NS_HANDLER","NS_ENDHANDLER","NS_VALUERETURN","NS_VOIDRETURN"], - literal:["false","true","FALSE","TRUE","nil","YES","NO","NULL"], - built_in:["dispatch_once_t","dispatch_queue_t","dispatch_sync","dispatch_async","dispatch_once"], - type:["int","float","char","unsigned","signed","short","long","double","wchar_t","unichar","void","bool","BOOL","id|0","_Bool"] - },illegal:"/,end:/$/,illegal:"\\n" - },e.C_LINE_COMMENT_MODE,e.C_BLOCK_COMMENT_MODE]},{className:"class", - begin:"("+t.keyword.join("|")+")\\b",end:/(\{|$)/,excludeEnd:!0,keywords:t, - contains:[e.UNDERSCORE_TITLE_MODE]},{begin:"\\."+e.UNDERSCORE_IDENT_RE, - relevance:0}]}},grmr_perl:e=>{const n=e.regex,t=/[dualxmsipngr]{0,12}/,a={ - $pattern:/[\w.]+/, - keyword:"abs accept alarm and atan2 bind binmode bless break caller chdir chmod chomp chop chown chr chroot close closedir connect continue cos crypt dbmclose dbmopen defined delete die do dump each else elsif endgrent endhostent endnetent endprotoent endpwent endservent eof eval exec exists exit exp fcntl fileno flock for foreach fork format formline getc getgrent getgrgid getgrnam gethostbyaddr gethostbyname gethostent getlogin getnetbyaddr getnetbyname getnetent getpeername getpgrp getpriority getprotobyname getprotobynumber getprotoent getpwent getpwnam getpwuid getservbyname getservbyport getservent getsockname getsockopt given glob gmtime goto grep gt hex if index int ioctl join keys kill last lc lcfirst length link listen local localtime log lstat lt ma map mkdir msgctl msgget msgrcv msgsnd my ne next no not oct open opendir or ord our pack package pipe pop pos print printf prototype push q|0 qq quotemeta qw qx rand read readdir readline readlink readpipe recv redo ref rename require reset return reverse rewinddir rindex rmdir say scalar seek seekdir select semctl semget semop send setgrent sethostent setnetent setpgrp setpriority setprotoent setpwent setservent setsockopt shift shmctl shmget shmread shmwrite shutdown sin sleep socket socketpair sort splice split sprintf sqrt srand stat state study sub substr symlink syscall sysopen sysread sysseek system syswrite tell telldir tie tied time times tr truncate uc ucfirst umask undef unless unlink unpack unshift untie until use utime values vec wait waitpid wantarray warn when while write x|0 xor y|0" - },i={className:"subst",begin:"[$@]\\{",end:"\\}",keywords:a},r={begin:/->\{/, - end:/\}/},s={variants:[{begin:/\$\d/},{ - begin:n.concat(/[$%@](\^\w\b|#\w+(::\w+)*|\{\w+\}|\w+(::\w*)*)/,"(?![A-Za-z])(?![@$%])") - },{begin:/[$%@][^\s\w{]/,relevance:0}] - },o=[e.BACKSLASH_ESCAPE,i,s],l=[/!/,/\//,/\|/,/\?/,/'/,/"/,/#/],c=(e,a,i="\\1")=>{ - const r="\\1"===i?i:n.concat(i,a) - ;return n.concat(n.concat("(?:",e,")"),a,/(?:\\.|[^\\\/])*?/,r,/(?:\\.|[^\\\/])*?/,i,t) - },d=(e,a,i)=>n.concat(n.concat("(?:",e,")"),a,/(?:\\.|[^\\\/])*?/,i,t),g=[s,e.HASH_COMMENT_MODE,e.COMMENT(/^=\w/,/=cut/,{ - endsWithParent:!0}),r,{className:"string",contains:o,variants:[{ - begin:"q[qwxr]?\\s*\\(",end:"\\)",relevance:5},{begin:"q[qwxr]?\\s*\\[", - end:"\\]",relevance:5},{begin:"q[qwxr]?\\s*\\{",end:"\\}",relevance:5},{ - begin:"q[qwxr]?\\s*\\|",end:"\\|",relevance:5},{begin:"q[qwxr]?\\s*<",end:">", - relevance:5},{begin:"qw\\s+q",end:"q",relevance:5},{begin:"'",end:"'", - contains:[e.BACKSLASH_ESCAPE]},{begin:'"',end:'"'},{begin:"`",end:"`", - contains:[e.BACKSLASH_ESCAPE]},{begin:/\{\w+\}/,relevance:0},{ - begin:"-?\\w+\\s*=>",relevance:0}]},{className:"number", - begin:"(\\b0[0-7_]+)|(\\b0x[0-9a-fA-F_]+)|(\\b[1-9][0-9_]*(\\.[0-9_]+)?)|[0_]\\b", - relevance:0},{ - begin:"(\\/\\/|"+e.RE_STARTERS_RE+"|\\b(split|return|print|reverse|grep)\\b)\\s*", - keywords:"split return print reverse grep",relevance:0, - contains:[e.HASH_COMMENT_MODE,{className:"regexp",variants:[{ - begin:c("s|tr|y",n.either(...l,{capture:!0}))},{begin:c("s|tr|y","\\(","\\)")},{ - begin:c("s|tr|y","\\[","\\]")},{begin:c("s|tr|y","\\{","\\}")}],relevance:2},{ - className:"regexp",variants:[{begin:/(m|qr)\/\//,relevance:0},{ - begin:d("(?:m|qr)?",/\//,/\//)},{begin:d("m|qr",n.either(...l,{capture:!0 - }),/\1/)},{begin:d("m|qr",/\(/,/\)/)},{begin:d("m|qr",/\[/,/\]/)},{ - begin:d("m|qr",/\{/,/\}/)}]}]},{className:"function",beginKeywords:"sub", - end:"(\\s*\\(.*?\\))?[;{]",excludeEnd:!0,relevance:5,contains:[e.TITLE_MODE]},{ - begin:"-\\w\\b",relevance:0},{begin:"^__DATA__$",end:"^__END__$", - subLanguage:"mojolicious",contains:[{begin:"^@@.*",end:"$",className:"comment"}] - }];return i.contains=g,r.contains=g,{name:"Perl",aliases:["pl","pm"],keywords:a, - contains:g}},grmr_php:e=>{ - const n=e.regex,t=/(?![A-Za-z0-9])(?![$])/,a=n.concat(/[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*/,t),i=n.concat(/(\\?[A-Z][a-z0-9_\x7f-\xff]+|\\?[A-Z]+(?=[A-Z][a-z0-9_\x7f-\xff])){1,}/,t),r={ - scope:"variable",match:"\\$+"+a},s={scope:"subst",variants:[{begin:/\$\w+/},{ - begin:/\{\$/,end:/\}/}]},o=e.inherit(e.APOS_STRING_MODE,{illegal:null - }),l="[ \t\n]",c={scope:"string",variants:[e.inherit(e.QUOTE_STRING_MODE,{ - illegal:null,contains:e.QUOTE_STRING_MODE.contains.concat(s)}),o,{ - begin:/<<<[ \t]*(?:(\w+)|"(\w+)")\n/,end:/[ \t]*(\w+)\b/, - contains:e.QUOTE_STRING_MODE.contains.concat(s),"on:begin":(e,n)=>{ - n.data._beginMatch=e[1]||e[2]},"on:end":(e,n)=>{ - n.data._beginMatch!==e[1]&&n.ignoreMatch()}},e.END_SAME_AS_BEGIN({ - begin:/<<<[ \t]*'(\w+)'\n/,end:/[ \t]*(\w+)\b/})]},d={scope:"number",variants:[{ - begin:"\\b0[bB][01]+(?:_[01]+)*\\b"},{begin:"\\b0[oO][0-7]+(?:_[0-7]+)*\\b"},{ - begin:"\\b0[xX][\\da-fA-F]+(?:_[\\da-fA-F]+)*\\b"},{ - begin:"(?:\\b\\d+(?:_\\d+)*(\\.(?:\\d+(?:_\\d+)*))?|\\B\\.\\d+)(?:[eE][+-]?\\d+)?" - }],relevance:0 - },g=["false","null","true"],u=["__CLASS__","__DIR__","__FILE__","__FUNCTION__","__COMPILER_HALT_OFFSET__","__LINE__","__METHOD__","__NAMESPACE__","__TRAIT__","die","echo","exit","include","include_once","print","require","require_once","array","abstract","and","as","binary","bool","boolean","break","callable","case","catch","class","clone","const","continue","declare","default","do","double","else","elseif","empty","enddeclare","endfor","endforeach","endif","endswitch","endwhile","enum","eval","extends","final","finally","float","for","foreach","from","global","goto","if","implements","instanceof","insteadof","int","integer","interface","isset","iterable","list","match|0","mixed","new","never","object","or","private","protected","public","readonly","real","return","string","switch","throw","trait","try","unset","use","var","void","while","xor","yield"],b=["Error|0","AppendIterator","ArgumentCountError","ArithmeticError","ArrayIterator","ArrayObject","AssertionError","BadFunctionCallException","BadMethodCallException","CachingIterator","CallbackFilterIterator","CompileError","Countable","DirectoryIterator","DivisionByZeroError","DomainException","EmptyIterator","ErrorException","Exception","FilesystemIterator","FilterIterator","GlobIterator","InfiniteIterator","InvalidArgumentException","IteratorIterator","LengthException","LimitIterator","LogicException","MultipleIterator","NoRewindIterator","OutOfBoundsException","OutOfRangeException","OuterIterator","OverflowException","ParentIterator","ParseError","RangeException","RecursiveArrayIterator","RecursiveCachingIterator","RecursiveCallbackFilterIterator","RecursiveDirectoryIterator","RecursiveFilterIterator","RecursiveIterator","RecursiveIteratorIterator","RecursiveRegexIterator","RecursiveTreeIterator","RegexIterator","RuntimeException","SeekableIterator","SplDoublyLinkedList","SplFileInfo","SplFileObject","SplFixedArray","SplHeap","SplMaxHeap","SplMinHeap","SplObjectStorage","SplObserver","SplPriorityQueue","SplQueue","SplStack","SplSubject","SplTempFileObject","TypeError","UnderflowException","UnexpectedValueException","UnhandledMatchError","ArrayAccess","BackedEnum","Closure","Fiber","Generator","Iterator","IteratorAggregate","Serializable","Stringable","Throwable","Traversable","UnitEnum","WeakReference","WeakMap","Directory","__PHP_Incomplete_Class","parent","php_user_filter","self","static","stdClass"],m={ - keyword:u,literal:(e=>{const n=[];return e.forEach((e=>{ - n.push(e),e.toLowerCase()===e?n.push(e.toUpperCase()):n.push(e.toLowerCase()) - })),n})(g),built_in:b},p=e=>e.map((e=>e.replace(/\|\d+$/,""))),_={variants:[{ - match:[/new/,n.concat(l,"+"),n.concat("(?!",p(b).join("\\b|"),"\\b)"),i],scope:{ - 1:"keyword",4:"title.class"}}]},h=n.concat(a,"\\b(?!\\()"),f={variants:[{ - match:[n.concat(/::/,n.lookahead(/(?!class\b)/)),h],scope:{2:"variable.constant" - }},{match:[/::/,/class/],scope:{2:"variable.language"}},{ - match:[i,n.concat(/::/,n.lookahead(/(?!class\b)/)),h],scope:{1:"title.class", - 3:"variable.constant"}},{match:[i,n.concat("::",n.lookahead(/(?!class\b)/))], - scope:{1:"title.class"}},{match:[i,/::/,/class/],scope:{1:"title.class", - 3:"variable.language"}}]},E={scope:"attr", - match:n.concat(a,n.lookahead(":"),n.lookahead(/(?!::)/))},y={relevance:0, - begin:/\(/,end:/\)/,keywords:m,contains:[E,r,f,e.C_BLOCK_COMMENT_MODE,c,d,_] - },N={relevance:0, - match:[/\b/,n.concat("(?!fn\\b|function\\b|",p(u).join("\\b|"),"|",p(b).join("\\b|"),"\\b)"),a,n.concat(l,"*"),n.lookahead(/(?=\()/)], - scope:{3:"title.function.invoke"},contains:[y]};y.contains.push(N) - ;const w=[E,f,e.C_BLOCK_COMMENT_MODE,c,d,_];return{case_insensitive:!1, - keywords:m,contains:[{begin:n.concat(/#\[\s*/,i),beginScope:"meta",end:/]/, - endScope:"meta",keywords:{literal:g,keyword:["new","array"]},contains:[{ - begin:/\[/,end:/]/,keywords:{literal:g,keyword:["new","array"]}, - contains:["self",...w]},...w,{scope:"meta",match:i}] - },e.HASH_COMMENT_MODE,e.COMMENT("//","$"),e.COMMENT("/\\*","\\*/",{contains:[{ - scope:"doctag",match:"@[A-Za-z]+"}]}),{match:/__halt_compiler\(\);/, - keywords:"__halt_compiler",starts:{scope:"comment",end:e.MATCH_NOTHING_RE, - contains:[{match:/\?>/,scope:"meta",endsParent:!0}]}},{scope:"meta",variants:[{ - begin:/<\?php/,relevance:10},{begin:/<\?=/},{begin:/<\?/,relevance:.1},{ - begin:/\?>/}]},{scope:"variable.language",match:/\$this\b/},r,N,f,{ - match:[/const/,/\s/,a],scope:{1:"keyword",3:"variable.constant"}},_,{ - scope:"function",relevance:0,beginKeywords:"fn function",end:/[;{]/, - excludeEnd:!0,illegal:"[$%\\[]",contains:[{beginKeywords:"use" - },e.UNDERSCORE_TITLE_MODE,{begin:"=>",endsParent:!0},{scope:"params", - begin:"\\(",end:"\\)",excludeBegin:!0,excludeEnd:!0,keywords:m, - contains:["self",r,f,e.C_BLOCK_COMMENT_MODE,c,d]}]},{scope:"class",variants:[{ - beginKeywords:"enum",illegal:/[($"]/},{beginKeywords:"class interface trait", - illegal:/[:($"]/}],relevance:0,end:/\{/,excludeEnd:!0,contains:[{ - beginKeywords:"extends implements"},e.UNDERSCORE_TITLE_MODE]},{ - beginKeywords:"namespace",relevance:0,end:";",illegal:/[.']/, - contains:[e.inherit(e.UNDERSCORE_TITLE_MODE,{scope:"title.class"})]},{ - beginKeywords:"use",relevance:0,end:";",contains:[{ - match:/\b(as|const|function)\b/,scope:"keyword"},e.UNDERSCORE_TITLE_MODE]},c,d]} - },grmr_php_template:e=>({name:"PHP template",subLanguage:"xml",contains:[{ - begin:/<\?(php|=)?/,end:/\?>/,subLanguage:"php",contains:[{begin:"/\\*", - end:"\\*/",skip:!0},{begin:'b"',end:'"',skip:!0},{begin:"b'",end:"'",skip:!0 - },e.inherit(e.APOS_STRING_MODE,{illegal:null,className:null,contains:null, - skip:!0}),e.inherit(e.QUOTE_STRING_MODE,{illegal:null,className:null, - contains:null,skip:!0})]}]}),grmr_plaintext:e=>({name:"Plain text", - aliases:["text","txt"],disableAutodetect:!0}),grmr_python:e=>{ - const n=e.regex,t=/[\p{XID_Start}_]\p{XID_Continue}*/u,a=["and","as","assert","async","await","break","case","class","continue","def","del","elif","else","except","finally","for","from","global","if","import","in","is","lambda","match","nonlocal|10","not","or","pass","raise","return","try","while","with","yield"],i={ - $pattern:/[A-Za-z]\w+|__\w+__/,keyword:a, - built_in:["__import__","abs","all","any","ascii","bin","bool","breakpoint","bytearray","bytes","callable","chr","classmethod","compile","complex","delattr","dict","dir","divmod","enumerate","eval","exec","filter","float","format","frozenset","getattr","globals","hasattr","hash","help","hex","id","input","int","isinstance","issubclass","iter","len","list","locals","map","max","memoryview","min","next","object","oct","open","ord","pow","print","property","range","repr","reversed","round","set","setattr","slice","sorted","staticmethod","str","sum","super","tuple","type","vars","zip"], - literal:["__debug__","Ellipsis","False","None","NotImplemented","True"], - type:["Any","Callable","Coroutine","Dict","List","Literal","Generic","Optional","Sequence","Set","Tuple","Type","Union"] - },r={className:"meta",begin:/^(>>>|\.\.\.) /},s={className:"subst",begin:/\{/, - end:/\}/,keywords:i,illegal:/#/},o={begin:/\{\{/,relevance:0},l={ - className:"string",contains:[e.BACKSLASH_ESCAPE],variants:[{ - begin:/([uU]|[bB]|[rR]|[bB][rR]|[rR][bB])?'''/,end:/'''/, - contains:[e.BACKSLASH_ESCAPE,r],relevance:10},{ - begin:/([uU]|[bB]|[rR]|[bB][rR]|[rR][bB])?"""/,end:/"""/, - contains:[e.BACKSLASH_ESCAPE,r],relevance:10},{ - begin:/([fF][rR]|[rR][fF]|[fF])'''/,end:/'''/, - contains:[e.BACKSLASH_ESCAPE,r,o,s]},{begin:/([fF][rR]|[rR][fF]|[fF])"""/, - end:/"""/,contains:[e.BACKSLASH_ESCAPE,r,o,s]},{begin:/([uU]|[rR])'/,end:/'/, - relevance:10},{begin:/([uU]|[rR])"/,end:/"/,relevance:10},{ - begin:/([bB]|[bB][rR]|[rR][bB])'/,end:/'/},{begin:/([bB]|[bB][rR]|[rR][bB])"/, - end:/"/},{begin:/([fF][rR]|[rR][fF]|[fF])'/,end:/'/, - contains:[e.BACKSLASH_ESCAPE,o,s]},{begin:/([fF][rR]|[rR][fF]|[fF])"/,end:/"/, - contains:[e.BACKSLASH_ESCAPE,o,s]},e.APOS_STRING_MODE,e.QUOTE_STRING_MODE] - },c="[0-9](_?[0-9])*",d=`(\\b(${c}))?\\.(${c})|\\b(${c})\\.`,g="\\b|"+a.join("|"),u={ - className:"number",relevance:0,variants:[{ - begin:`(\\b(${c})|(${d}))[eE][+-]?(${c})[jJ]?(?=${g})`},{begin:`(${d})[jJ]?`},{ - begin:`\\b([1-9](_?[0-9])*|0+(_?0)*)[lLjJ]?(?=${g})`},{ - begin:`\\b0[bB](_?[01])+[lL]?(?=${g})`},{begin:`\\b0[oO](_?[0-7])+[lL]?(?=${g})` - },{begin:`\\b0[xX](_?[0-9a-fA-F])+[lL]?(?=${g})`},{begin:`\\b(${c})[jJ](?=${g})` - }]},b={className:"comment",begin:n.lookahead(/# type:/),end:/$/,keywords:i, - contains:[{begin:/# type:/},{begin:/#/,end:/\b\B/,endsWithParent:!0}]},m={ - className:"params",variants:[{className:"",begin:/\(\s*\)/,skip:!0},{begin:/\(/, - end:/\)/,excludeBegin:!0,excludeEnd:!0,keywords:i, - contains:["self",r,u,l,e.HASH_COMMENT_MODE]}]};return s.contains=[l,u,r],{ - name:"Python",aliases:["py","gyp","ipython"],unicodeRegex:!0,keywords:i, - illegal:/(<\/|\?)|=>/,contains:[r,u,{begin:/\bself\b/},{beginKeywords:"if", - relevance:0},l,b,e.HASH_COMMENT_MODE,{match:[/\bdef/,/\s+/,t],scope:{ - 1:"keyword",3:"title.function"},contains:[m]},{variants:[{ - match:[/\bclass/,/\s+/,t,/\s*/,/\(\s*/,t,/\s*\)/]},{match:[/\bclass/,/\s+/,t]}], - scope:{1:"keyword",3:"title.class",6:"title.class.inherited"}},{ - className:"meta",begin:/^[\t ]*@/,end:/(?=#)|$/,contains:[u,m,l]}]}}, - grmr_python_repl:e=>({aliases:["pycon"],contains:[{className:"meta.prompt", - starts:{end:/ |$/,starts:{end:"$",subLanguage:"python"}},variants:[{ - begin:/^>>>(?=[ ]|$)/},{begin:/^\.\.\.(?=[ ]|$)/}]}]}),grmr_r:e=>{ - const n=e.regex,t=/(?:(?:[a-zA-Z]|\.[._a-zA-Z])[._a-zA-Z0-9]*)|\.(?!\d)/,a=n.either(/0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/,/0[xX][0-9a-fA-F]+(?:[pP][+-]?\d+)?[Li]?/,/(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?[Li]?/),i=/[=!<>:]=|\|\||&&|:::?|<-|<<-|->>|->|\|>|[-+*\/?!$&|:<=>@^~]|\*\*/,r=n.either(/[()]/,/[{}]/,/\[\[/,/[[\]]/,/\\/,/,/) - ;return{name:"R",keywords:{$pattern:t, - keyword:"function if in break next repeat else for while", - literal:"NULL NA TRUE FALSE Inf NaN NA_integer_|10 NA_real_|10 NA_character_|10 NA_complex_|10", - built_in:"LETTERS letters month.abb month.name pi T F abs acos acosh all any anyNA Arg as.call as.character as.complex as.double as.environment as.integer as.logical as.null.default as.numeric as.raw asin asinh atan atanh attr attributes baseenv browser c call ceiling class Conj cos cosh cospi cummax cummin cumprod cumsum digamma dim dimnames emptyenv exp expression floor forceAndCall gamma gc.time globalenv Im interactive invisible is.array is.atomic is.call is.character is.complex is.double is.environment is.expression is.finite is.function is.infinite is.integer is.language is.list is.logical is.matrix is.na is.name is.nan is.null is.numeric is.object is.pairlist is.raw is.recursive is.single is.symbol lazyLoadDBfetch length lgamma list log max min missing Mod names nargs nzchar oldClass on.exit pos.to.env proc.time prod quote range Re rep retracemem return round seq_along seq_len seq.int sign signif sin sinh sinpi sqrt standardGeneric substitute sum switch tan tanh tanpi tracemem trigamma trunc unclass untracemem UseMethod xtfrm" - },contains:[e.COMMENT(/#'/,/$/,{contains:[{scope:"doctag",match:/@examples/, - starts:{end:n.lookahead(n.either(/\n^#'\s*(?=@[a-zA-Z]+)/,/\n^(?!#')/)), - endsParent:!0}},{scope:"doctag",begin:"@param",end:/$/,contains:[{ - scope:"variable",variants:[{match:t},{match:/`(?:\\.|[^`\\])+`/}],endsParent:!0 - }]},{scope:"doctag",match:/@[a-zA-Z]+/},{scope:"keyword",match:/\\[a-zA-Z]+/}] - }),e.HASH_COMMENT_MODE,{scope:"string",contains:[e.BACKSLASH_ESCAPE], - variants:[e.END_SAME_AS_BEGIN({begin:/[rR]"(-*)\(/,end:/\)(-*)"/ - }),e.END_SAME_AS_BEGIN({begin:/[rR]"(-*)\{/,end:/\}(-*)"/ - }),e.END_SAME_AS_BEGIN({begin:/[rR]"(-*)\[/,end:/\](-*)"/ - }),e.END_SAME_AS_BEGIN({begin:/[rR]'(-*)\(/,end:/\)(-*)'/ - }),e.END_SAME_AS_BEGIN({begin:/[rR]'(-*)\{/,end:/\}(-*)'/ - }),e.END_SAME_AS_BEGIN({begin:/[rR]'(-*)\[/,end:/\](-*)'/}),{begin:'"',end:'"', - relevance:0},{begin:"'",end:"'",relevance:0}]},{relevance:0,variants:[{scope:{ - 1:"operator",2:"number"},match:[i,a]},{scope:{1:"operator",2:"number"}, - match:[/%[^%]*%/,a]},{scope:{1:"punctuation",2:"number"},match:[r,a]},{scope:{ - 2:"number"},match:[/[^a-zA-Z0-9._]|^/,a]}]},{scope:{3:"operator"}, - match:[t,/\s+/,/<-/,/\s+/]},{scope:"operator",relevance:0,variants:[{match:i},{ - match:/%[^%]*%/}]},{scope:"punctuation",relevance:0,match:r},{begin:"`",end:"`", - contains:[{begin:/\\./}]}]}},grmr_ruby:e=>{ - const n=e.regex,t="([a-zA-Z_]\\w*[!?=]?|[-+~]@|<<|>>|=~|===?|<=>|[<>]=?|\\*\\*|[-/+%^&*~`|]|\\[\\]=?)",a=n.either(/\b([A-Z]+[a-z0-9]+)+/,/\b([A-Z]+[a-z0-9]+)+[A-Z]+/),i=n.concat(a,/(::\w+)*/),r={ - "variable.constant":["__FILE__","__LINE__","__ENCODING__"], - "variable.language":["self","super"], - keyword:["alias","and","begin","BEGIN","break","case","class","defined","do","else","elsif","end","END","ensure","for","if","in","module","next","not","or","redo","require","rescue","retry","return","then","undef","unless","until","when","while","yield","include","extend","prepend","public","private","protected","raise","throw"], - built_in:["proc","lambda","attr_accessor","attr_reader","attr_writer","define_method","private_constant","module_function"], - literal:["true","false","nil"]},s={className:"doctag",begin:"@[A-Za-z]+"},o={ - begin:"#<",end:">"},l=[e.COMMENT("#","$",{contains:[s] - }),e.COMMENT("^=begin","^=end",{contains:[s],relevance:10 - }),e.COMMENT("^__END__",e.MATCH_NOTHING_RE)],c={className:"subst",begin:/#\{/, - end:/\}/,keywords:r},d={className:"string",contains:[e.BACKSLASH_ESCAPE,c], - variants:[{begin:/'/,end:/'/},{begin:/"/,end:/"/},{begin:/`/,end:/`/},{ - begin:/%[qQwWx]?\(/,end:/\)/},{begin:/%[qQwWx]?\[/,end:/\]/},{ - begin:/%[qQwWx]?\{/,end:/\}/},{begin:/%[qQwWx]?/},{begin:/%[qQwWx]?\//, - end:/\//},{begin:/%[qQwWx]?%/,end:/%/},{begin:/%[qQwWx]?-/,end:/-/},{ - begin:/%[qQwWx]?\|/,end:/\|/},{begin:/\B\?(\\\d{1,3})/},{ - begin:/\B\?(\\x[A-Fa-f0-9]{1,2})/},{begin:/\B\?(\\u\{?[A-Fa-f0-9]{1,6}\}?)/},{ - begin:/\B\?(\\M-\\C-|\\M-\\c|\\c\\M-|\\M-|\\C-\\M-)[\x20-\x7e]/},{ - begin:/\B\?\\(c|C-)[\x20-\x7e]/},{begin:/\B\?\\?\S/},{ - begin:n.concat(/<<[-~]?'?/,n.lookahead(/(\w+)(?=\W)[^\n]*\n(?:[^\n]*\n)*?\s*\1\b/)), - contains:[e.END_SAME_AS_BEGIN({begin:/(\w+)/,end:/(\w+)/, - contains:[e.BACKSLASH_ESCAPE,c]})]}]},g="[0-9](_?[0-9])*",u={className:"number", - relevance:0,variants:[{ - begin:`\\b([1-9](_?[0-9])*|0)(\\.(${g}))?([eE][+-]?(${g})|r)?i?\\b`},{ - begin:"\\b0[dD][0-9](_?[0-9])*r?i?\\b"},{begin:"\\b0[bB][0-1](_?[0-1])*r?i?\\b" - },{begin:"\\b0[oO][0-7](_?[0-7])*r?i?\\b"},{ - begin:"\\b0[xX][0-9a-fA-F](_?[0-9a-fA-F])*r?i?\\b"},{ - begin:"\\b0(_?[0-7])+r?i?\\b"}]},b={variants:[{match:/\(\)/},{ - className:"params",begin:/\(/,end:/(?=\))/,excludeBegin:!0,endsParent:!0, - keywords:r}]},m=[d,{variants:[{match:[/class\s+/,i,/\s+<\s+/,i]},{ - match:[/\b(class|module)\s+/,i]}],scope:{2:"title.class", - 4:"title.class.inherited"},keywords:r},{match:[/(include|extend)\s+/,i],scope:{ - 2:"title.class"},keywords:r},{relevance:0,match:[i,/\.new[. (]/],scope:{ - 1:"title.class"}},{relevance:0,match:/\b[A-Z][A-Z_0-9]+\b/, - className:"variable.constant"},{relevance:0,match:a,scope:"title.class"},{ - match:[/def/,/\s+/,t],scope:{1:"keyword",3:"title.function"},contains:[b]},{ - begin:e.IDENT_RE+"::"},{className:"symbol", - begin:e.UNDERSCORE_IDENT_RE+"(!|\\?)?:",relevance:0},{className:"symbol", - begin:":(?!\\s)",contains:[d,{begin:t}],relevance:0},u,{className:"variable", - begin:"(\\$\\W)|((\\$|@@?)(\\w+))(?=[^@$?])(?![A-Za-z])(?![@$?'])"},{ - className:"params",begin:/\|/,end:/\|/,excludeBegin:!0,excludeEnd:!0, - relevance:0,keywords:r},{begin:"("+e.RE_STARTERS_RE+"|unless)\\s*", - keywords:"unless",contains:[{className:"regexp",contains:[e.BACKSLASH_ESCAPE,c], - illegal:/\n/,variants:[{begin:"/",end:"/[a-z]*"},{begin:/%r\{/,end:/\}[a-z]*/},{ - begin:"%r\\(",end:"\\)[a-z]*"},{begin:"%r!",end:"![a-z]*"},{begin:"%r\\[", - end:"\\][a-z]*"}]}].concat(o,l),relevance:0}].concat(o,l) - ;c.contains=m,b.contains=m;const p=[{begin:/^\s*=>/,starts:{end:"$",contains:m} - },{className:"meta.prompt", - begin:"^([>?]>|[\\w#]+\\(\\w+\\):\\d+:\\d+[>*]|(\\w+-)?\\d+\\.\\d+\\.\\d+(p\\d+)?[^\\d][^>]+>)(?=[ ])", - starts:{end:"$",keywords:r,contains:m}}];return l.unshift(o),{name:"Ruby", - aliases:["rb","gemspec","podspec","thor","irb"],keywords:r,illegal:/\/\*/, - contains:[e.SHEBANG({binary:"ruby"})].concat(p).concat(l).concat(m)}}, - grmr_rust:e=>{const n=e.regex,t={className:"title.function.invoke",relevance:0, - begin:n.concat(/\b/,/(?!let|for|while|if|else|match\b)/,e.IDENT_RE,n.lookahead(/\s*\(/)) - },a="([ui](8|16|32|64|128|size)|f(32|64))?",i=["drop ","Copy","Send","Sized","Sync","Drop","Fn","FnMut","FnOnce","ToOwned","Clone","Debug","PartialEq","PartialOrd","Eq","Ord","AsRef","AsMut","Into","From","Default","Iterator","Extend","IntoIterator","DoubleEndedIterator","ExactSizeIterator","SliceConcatExt","ToString","assert!","assert_eq!","bitflags!","bytes!","cfg!","col!","concat!","concat_idents!","debug_assert!","debug_assert_eq!","env!","eprintln!","panic!","file!","format!","format_args!","include_bytes!","include_str!","line!","local_data_key!","module_path!","option_env!","print!","println!","select!","stringify!","try!","unimplemented!","unreachable!","vec!","write!","writeln!","macro_rules!","assert_ne!","debug_assert_ne!"],r=["i8","i16","i32","i64","i128","isize","u8","u16","u32","u64","u128","usize","f32","f64","str","char","bool","Box","Option","Result","String","Vec"] - ;return{name:"Rust",aliases:["rs"],keywords:{$pattern:e.IDENT_RE+"!?",type:r, - keyword:["abstract","as","async","await","become","box","break","const","continue","crate","do","dyn","else","enum","extern","false","final","fn","for","if","impl","in","let","loop","macro","match","mod","move","mut","override","priv","pub","ref","return","self","Self","static","struct","super","trait","true","try","type","typeof","unsafe","unsized","use","virtual","where","while","yield"], - literal:["true","false","Some","None","Ok","Err"],built_in:i},illegal:""},t]}}, - grmr_scss:e=>{const n=ie(e),t=le,a=oe,i="@[a-z-]+",r={className:"variable", - begin:"(\\$[a-zA-Z-][a-zA-Z0-9_-]*)\\b",relevance:0};return{name:"SCSS", - case_insensitive:!0,illegal:"[=/|']", - contains:[e.C_LINE_COMMENT_MODE,e.C_BLOCK_COMMENT_MODE,n.CSS_NUMBER_MODE,{ - className:"selector-id",begin:"#[A-Za-z0-9_-]+",relevance:0},{ - className:"selector-class",begin:"\\.[A-Za-z0-9_-]+",relevance:0 - },n.ATTRIBUTE_SELECTOR_MODE,{className:"selector-tag", - begin:"\\b("+re.join("|")+")\\b",relevance:0},{className:"selector-pseudo", - begin:":("+a.join("|")+")"},{className:"selector-pseudo", - begin:":(:)?("+t.join("|")+")"},r,{begin:/\(/,end:/\)/, - contains:[n.CSS_NUMBER_MODE]},n.CSS_VARIABLE,{className:"attribute", - begin:"\\b("+ce.join("|")+")\\b"},{ - begin:"\\b(whitespace|wait|w-resize|visible|vertical-text|vertical-ideographic|uppercase|upper-roman|upper-alpha|underline|transparent|top|thin|thick|text|text-top|text-bottom|tb-rl|table-header-group|table-footer-group|sw-resize|super|strict|static|square|solid|small-caps|separate|se-resize|scroll|s-resize|rtl|row-resize|ridge|right|repeat|repeat-y|repeat-x|relative|progress|pointer|overline|outside|outset|oblique|nowrap|not-allowed|normal|none|nw-resize|no-repeat|no-drop|newspaper|ne-resize|n-resize|move|middle|medium|ltr|lr-tb|lowercase|lower-roman|lower-alpha|loose|list-item|line|line-through|line-edge|lighter|left|keep-all|justify|italic|inter-word|inter-ideograph|inside|inset|inline|inline-block|inherit|inactive|ideograph-space|ideograph-parenthesis|ideograph-numeric|ideograph-alpha|horizontal|hidden|help|hand|groove|fixed|ellipsis|e-resize|double|dotted|distribute|distribute-space|distribute-letter|distribute-all-lines|disc|disabled|default|decimal|dashed|crosshair|collapse|col-resize|circle|char|center|capitalize|break-word|break-all|bottom|both|bolder|bold|block|bidi-override|below|baseline|auto|always|all-scroll|absolute|table|table-cell)\\b" - },{begin:/:/,end:/[;}{]/,relevance:0, - contains:[n.BLOCK_COMMENT,r,n.HEXCOLOR,n.CSS_NUMBER_MODE,e.QUOTE_STRING_MODE,e.APOS_STRING_MODE,n.IMPORTANT,n.FUNCTION_DISPATCH] - },{begin:"@(page|font-face)",keywords:{$pattern:i,keyword:"@page @font-face"}},{ - begin:"@",end:"[{;]",returnBegin:!0,keywords:{$pattern:/[a-z-]+/, - keyword:"and or not only",attribute:se.join(" ")},contains:[{begin:i, - className:"keyword"},{begin:/[a-z-]+(?=:)/,className:"attribute" - },r,e.QUOTE_STRING_MODE,e.APOS_STRING_MODE,n.HEXCOLOR,n.CSS_NUMBER_MODE] - },n.FUNCTION_DISPATCH]}},grmr_shell:e=>({name:"Shell Session", - aliases:["console","shellsession"],contains:[{className:"meta.prompt", - begin:/^\s{0,3}[/~\w\d[\]()@-]*[>%$#][ ]?/,starts:{end:/[^\\](?=\s*$)/, - subLanguage:"bash"}}]}),grmr_sql:e=>{ - const n=e.regex,t=e.COMMENT("--","$"),a=["true","false","unknown"],i=["bigint","binary","blob","boolean","char","character","clob","date","dec","decfloat","decimal","float","int","integer","interval","nchar","nclob","national","numeric","real","row","smallint","time","timestamp","varchar","varying","varbinary"],r=["abs","acos","array_agg","asin","atan","avg","cast","ceil","ceiling","coalesce","corr","cos","cosh","count","covar_pop","covar_samp","cume_dist","dense_rank","deref","element","exp","extract","first_value","floor","json_array","json_arrayagg","json_exists","json_object","json_objectagg","json_query","json_table","json_table_primitive","json_value","lag","last_value","lead","listagg","ln","log","log10","lower","max","min","mod","nth_value","ntile","nullif","percent_rank","percentile_cont","percentile_disc","position","position_regex","power","rank","regr_avgx","regr_avgy","regr_count","regr_intercept","regr_r2","regr_slope","regr_sxx","regr_sxy","regr_syy","row_number","sin","sinh","sqrt","stddev_pop","stddev_samp","substring","substring_regex","sum","tan","tanh","translate","translate_regex","treat","trim","trim_array","unnest","upper","value_of","var_pop","var_samp","width_bucket"],s=["create table","insert into","primary key","foreign key","not null","alter table","add constraint","grouping sets","on overflow","character set","respect nulls","ignore nulls","nulls first","nulls last","depth first","breadth first"],o=r,l=["abs","acos","all","allocate","alter","and","any","are","array","array_agg","array_max_cardinality","as","asensitive","asin","asymmetric","at","atan","atomic","authorization","avg","begin","begin_frame","begin_partition","between","bigint","binary","blob","boolean","both","by","call","called","cardinality","cascaded","case","cast","ceil","ceiling","char","char_length","character","character_length","check","classifier","clob","close","coalesce","collate","collect","column","commit","condition","connect","constraint","contains","convert","copy","corr","corresponding","cos","cosh","count","covar_pop","covar_samp","create","cross","cube","cume_dist","current","current_catalog","current_date","current_default_transform_group","current_path","current_role","current_row","current_schema","current_time","current_timestamp","current_path","current_role","current_transform_group_for_type","current_user","cursor","cycle","date","day","deallocate","dec","decimal","decfloat","declare","default","define","delete","dense_rank","deref","describe","deterministic","disconnect","distinct","double","drop","dynamic","each","element","else","empty","end","end_frame","end_partition","end-exec","equals","escape","every","except","exec","execute","exists","exp","external","extract","false","fetch","filter","first_value","float","floor","for","foreign","frame_row","free","from","full","function","fusion","get","global","grant","group","grouping","groups","having","hold","hour","identity","in","indicator","initial","inner","inout","insensitive","insert","int","integer","intersect","intersection","interval","into","is","join","json_array","json_arrayagg","json_exists","json_object","json_objectagg","json_query","json_table","json_table_primitive","json_value","lag","language","large","last_value","lateral","lead","leading","left","like","like_regex","listagg","ln","local","localtime","localtimestamp","log","log10","lower","match","match_number","match_recognize","matches","max","member","merge","method","min","minute","mod","modifies","module","month","multiset","national","natural","nchar","nclob","new","no","none","normalize","not","nth_value","ntile","null","nullif","numeric","octet_length","occurrences_regex","of","offset","old","omit","on","one","only","open","or","order","out","outer","over","overlaps","overlay","parameter","partition","pattern","per","percent","percent_rank","percentile_cont","percentile_disc","period","portion","position","position_regex","power","precedes","precision","prepare","primary","procedure","ptf","range","rank","reads","real","recursive","ref","references","referencing","regr_avgx","regr_avgy","regr_count","regr_intercept","regr_r2","regr_slope","regr_sxx","regr_sxy","regr_syy","release","result","return","returns","revoke","right","rollback","rollup","row","row_number","rows","running","savepoint","scope","scroll","search","second","seek","select","sensitive","session_user","set","show","similar","sin","sinh","skip","smallint","some","specific","specifictype","sql","sqlexception","sqlstate","sqlwarning","sqrt","start","static","stddev_pop","stddev_samp","submultiset","subset","substring","substring_regex","succeeds","sum","symmetric","system","system_time","system_user","table","tablesample","tan","tanh","then","time","timestamp","timezone_hour","timezone_minute","to","trailing","translate","translate_regex","translation","treat","trigger","trim","trim_array","true","truncate","uescape","union","unique","unknown","unnest","update","upper","user","using","value","values","value_of","var_pop","var_samp","varbinary","varchar","varying","versioning","when","whenever","where","width_bucket","window","with","within","without","year","add","asc","collation","desc","final","first","last","view"].filter((e=>!r.includes(e))),c={ - begin:n.concat(/\b/,n.either(...o),/\s*\(/),relevance:0,keywords:{built_in:o}} - ;return{name:"SQL",case_insensitive:!0,illegal:/[{}]|<\//,keywords:{ - $pattern:/\b[\w\.]+/,keyword:((e,{exceptions:n,when:t}={})=>{const a=t - ;return n=n||[],e.map((e=>e.match(/\|\d+$/)||n.includes(e)?e:a(e)?e+"|0":e)) - })(l,{when:e=>e.length<3}),literal:a,type:i, - built_in:["current_catalog","current_date","current_default_transform_group","current_path","current_role","current_schema","current_transform_group_for_type","current_user","session_user","system_time","system_user","current_time","localtime","current_timestamp","localtimestamp"] - },contains:[{begin:n.either(...s),relevance:0,keywords:{$pattern:/[\w\.]+/, - keyword:l.concat(s),literal:a,type:i}},{className:"type", - begin:n.either("double precision","large object","with timezone","without timezone") - },c,{className:"variable",begin:/@[a-z0-9][a-z0-9_]*/},{className:"string", - variants:[{begin:/'/,end:/'/,contains:[{begin:/''/}]}]},{begin:/"/,end:/"/, - contains:[{begin:/""/}]},e.C_NUMBER_MODE,e.C_BLOCK_COMMENT_MODE,t,{ - className:"operator",begin:/[-+*/=%^~]|&&?|\|\|?|!=?|<(?:=>?|<|>)?|>[>=]?/, - relevance:0}]}},grmr_swift:e=>{const n={match:/\s+/,relevance:0 - },t=e.COMMENT("/\\*","\\*/",{contains:["self"]}),a=[e.C_LINE_COMMENT_MODE,t],i={ - match:[/\./,m(...xe,...Me)],className:{2:"keyword"}},r={match:b(/\./,m(...Ae)), - relevance:0},s=Ae.filter((e=>"string"==typeof e)).concat(["_|0"]),o={variants:[{ - className:"keyword", - match:m(...Ae.filter((e=>"string"!=typeof e)).concat(Se).map(ke),...Me)}]},l={ - $pattern:m(/\b\w+/,/#\w+/),keyword:s.concat(Re),literal:Ce},c=[i,r,o],g=[{ - match:b(/\./,m(...De)),relevance:0},{className:"built_in", - match:b(/\b/,m(...De),/(?=\()/)}],u={match:/->/,relevance:0},p=[u,{ - className:"operator",relevance:0,variants:[{match:Be},{match:`\\.(\\.|${Le})+`}] - }],_="([0-9]_*)+",h="([0-9a-fA-F]_*)+",f={className:"number",relevance:0, - variants:[{match:`\\b(${_})(\\.(${_}))?([eE][+-]?(${_}))?\\b`},{ - match:`\\b0x(${h})(\\.(${h}))?([pP][+-]?(${_}))?\\b`},{match:/\b0o([0-7]_*)+\b/ - },{match:/\b0b([01]_*)+\b/}]},E=(e="")=>({className:"subst",variants:[{ - match:b(/\\/,e,/[0\\tnr"']/)},{match:b(/\\/,e,/u\{[0-9a-fA-F]{1,8}\}/)}] - }),y=(e="")=>({className:"subst",match:b(/\\/,e,/[\t ]*(?:[\r\n]|\r\n)/) - }),N=(e="")=>({className:"subst",label:"interpol",begin:b(/\\/,e,/\(/),end:/\)/ - }),w=(e="")=>({begin:b(e,/"""/),end:b(/"""/,e),contains:[E(e),y(e),N(e)] - }),v=(e="")=>({begin:b(e,/"/),end:b(/"/,e),contains:[E(e),N(e)]}),O={ - className:"string", - variants:[w(),w("#"),w("##"),w("###"),v(),v("#"),v("##"),v("###")] - },k=[e.BACKSLASH_ESCAPE,{begin:/\[/,end:/\]/,relevance:0, - contains:[e.BACKSLASH_ESCAPE]}],x={begin:/\/[^\s](?=[^/\n]*\/)/,end:/\//, - contains:k},M=e=>{const n=b(e,/\//),t=b(/\//,e);return{begin:n,end:t, - contains:[...k,{scope:"comment",begin:`#(?!.*${t})`,end:/$/}]}},S={ - scope:"regexp",variants:[M("###"),M("##"),M("#"),x]},A={match:b(/`/,Fe,/`/) - },C=[A,{className:"variable",match:/\$\d+/},{className:"variable", - match:`\\$${ze}+`}],T=[{match:/(@|#(un)?)available/,scope:"keyword",starts:{ - contains:[{begin:/\(/,end:/\)/,keywords:Pe,contains:[...p,f,O]}]}},{ - scope:"keyword",match:b(/@/,m(...je))},{scope:"meta",match:b(/@/,Fe)}],R={ - match:d(/\b[A-Z]/),relevance:0,contains:[{className:"type", - match:b(/(AV|CA|CF|CG|CI|CL|CM|CN|CT|MK|MP|MTK|MTL|NS|SCN|SK|UI|WK|XC)/,ze,"+") - },{className:"type",match:Ue,relevance:0},{match:/[?!]+/,relevance:0},{ - match:/\.\.\./,relevance:0},{match:b(/\s+&\s+/,d(Ue)),relevance:0}]},D={ - begin://,keywords:l,contains:[...a,...c,...T,u,R]};R.contains.push(D) - ;const I={begin:/\(/,end:/\)/,relevance:0,keywords:l,contains:["self",{ - match:b(Fe,/\s*:/),keywords:"_|0",relevance:0 - },...a,S,...c,...g,...p,f,O,...C,...T,R]},L={begin://, - keywords:"repeat each",contains:[...a,R]},B={begin:/\(/,end:/\)/,keywords:l, - contains:[{begin:m(d(b(Fe,/\s*:/)),d(b(Fe,/\s+/,Fe,/\s*:/))),end:/:/, - relevance:0,contains:[{className:"keyword",match:/\b_\b/},{className:"params", - match:Fe}]},...a,...c,...p,f,O,...T,R,I],endsParent:!0,illegal:/["']/},$={ - match:[/(func|macro)/,/\s+/,m(A.match,Fe,Be)],className:{1:"keyword", - 3:"title.function"},contains:[L,B,n],illegal:[/\[/,/%/]},z={ - match:[/\b(?:subscript|init[?!]?)/,/\s*(?=[<(])/],className:{1:"keyword"}, - contains:[L,B,n],illegal:/\[|%/},F={match:[/operator/,/\s+/,Be],className:{ - 1:"keyword",3:"title"}},U={begin:[/precedencegroup/,/\s+/,Ue],className:{ - 1:"keyword",3:"title"},contains:[R],keywords:[...Te,...Ce],end:/}/} - ;for(const e of O.variants){const n=e.contains.find((e=>"interpol"===e.label)) - ;n.keywords=l;const t=[...c,...g,...p,f,O,...C];n.contains=[...t,{begin:/\(/, - end:/\)/,contains:["self",...t]}]}return{name:"Swift",keywords:l, - contains:[...a,$,z,{beginKeywords:"struct protocol class extension enum actor", - end:"\\{",excludeEnd:!0,keywords:l,contains:[e.inherit(e.TITLE_MODE,{ - className:"title.class",begin:/[A-Za-z$_][\u00C0-\u02B80-9A-Za-z$_]*/}),...c] - },F,U,{beginKeywords:"import",end:/$/,contains:[...a],relevance:0 - },S,...c,...g,...p,f,O,...C,...T,R,I]}},grmr_typescript:e=>{ - const n=Oe(e),t=_e,a=["any","void","number","boolean","string","object","never","symbol","bigint","unknown"],i={ - beginKeywords:"namespace",end:/\{/,excludeEnd:!0, - contains:[n.exports.CLASS_REFERENCE]},r={beginKeywords:"interface",end:/\{/, - excludeEnd:!0,keywords:{keyword:"interface extends",built_in:a}, - contains:[n.exports.CLASS_REFERENCE]},s={$pattern:_e, - keyword:he.concat(["type","namespace","interface","public","private","protected","implements","declare","abstract","readonly","enum","override"]), - literal:fe,built_in:ve.concat(a),"variable.language":we},o={className:"meta", - begin:"@"+t},l=(e,n,t)=>{const a=e.contains.findIndex((e=>e.label===n)) - ;if(-1===a)throw Error("can not find mode to replace");e.contains.splice(a,1,t)} - ;return Object.assign(n.keywords,s), - n.exports.PARAMS_CONTAINS.push(o),n.contains=n.contains.concat([o,i,r]), - l(n,"shebang",e.SHEBANG()),l(n,"use_strict",{className:"meta",relevance:10, - begin:/^\s*['"]use strict['"]/ - }),n.contains.find((e=>"func.def"===e.label)).relevance=0,Object.assign(n,{ - name:"TypeScript",aliases:["ts","tsx","mts","cts"]}),n},grmr_vbnet:e=>{ - const n=e.regex,t=/\d{1,2}\/\d{1,2}\/\d{4}/,a=/\d{4}-\d{1,2}-\d{1,2}/,i=/(\d|1[012])(:\d+){0,2} *(AM|PM)/,r=/\d{1,2}(:\d{1,2}){1,2}/,s={ - className:"literal",variants:[{begin:n.concat(/# */,n.either(a,t),/ *#/)},{ - begin:n.concat(/# */,r,/ *#/)},{begin:n.concat(/# */,i,/ *#/)},{ - begin:n.concat(/# */,n.either(a,t),/ +/,n.either(i,r),/ *#/)}] - },o=e.COMMENT(/'''/,/$/,{contains:[{className:"doctag",begin:/<\/?/,end:/>/}] - }),l=e.COMMENT(null,/$/,{variants:[{begin:/'/},{begin:/([\t ]|^)REM(?=\s)/}]}) - ;return{name:"Visual Basic .NET",aliases:["vb"],case_insensitive:!0, - classNameAliases:{label:"symbol"},keywords:{ - keyword:"addhandler alias aggregate ansi as async assembly auto binary by byref byval call case catch class compare const continue custom declare default delegate dim distinct do each equals else elseif end enum erase error event exit explicit finally for friend from function get global goto group handles if implements imports in inherits interface into iterator join key let lib loop me mid module mustinherit mustoverride mybase myclass namespace narrowing new next notinheritable notoverridable of off on operator option optional order overloads overridable overrides paramarray partial preserve private property protected public raiseevent readonly redim removehandler resume return select set shadows shared skip static step stop structure strict sub synclock take text then throw to try unicode until using when where while widening with withevents writeonly yield", - built_in:"addressof and andalso await directcast gettype getxmlnamespace is isfalse isnot istrue like mod nameof new not or orelse trycast typeof xor cbool cbyte cchar cdate cdbl cdec cint clng cobj csbyte cshort csng cstr cuint culng cushort", - type:"boolean byte char date decimal double integer long object sbyte short single string uinteger ulong ushort", - literal:"true false nothing"}, - illegal:"//|\\{|\\}|endif|gosub|variant|wend|^\\$ ",contains:[{ - className:"string",begin:/"(""|[^/n])"C\b/},{className:"string",begin:/"/, - end:/"/,illegal:/\n/,contains:[{begin:/""/}]},s,{className:"number",relevance:0, - variants:[{begin:/\b\d[\d_]*((\.[\d_]+(E[+-]?[\d_]+)?)|(E[+-]?[\d_]+))[RFD@!#]?/ - },{begin:/\b\d[\d_]*((U?[SIL])|[%&])?/},{begin:/&H[\dA-F_]+((U?[SIL])|[%&])?/},{ - begin:/&O[0-7_]+((U?[SIL])|[%&])?/},{begin:/&B[01_]+((U?[SIL])|[%&])?/}]},{ - className:"label",begin:/^\w+:/},o,l,{className:"meta", - begin:/[\t ]*#(const|disable|else|elseif|enable|end|externalsource|if|region)\b/, - end:/$/,keywords:{ - keyword:"const disable else elseif enable end externalsource if region then"}, - contains:[l]}]}},grmr_wasm:e=>{e.regex;const n=e.COMMENT(/\(;/,/;\)/) - ;return n.contains.push("self"),{name:"WebAssembly",keywords:{$pattern:/[\w.]+/, - keyword:["anyfunc","block","br","br_if","br_table","call","call_indirect","data","drop","elem","else","end","export","func","global.get","global.set","local.get","local.set","local.tee","get_global","get_local","global","if","import","local","loop","memory","memory.grow","memory.size","module","mut","nop","offset","param","result","return","select","set_global","set_local","start","table","tee_local","then","type","unreachable"] - },contains:[e.COMMENT(/;;/,/$/),n,{match:[/(?:offset|align)/,/\s*/,/=/], - className:{1:"keyword",3:"operator"}},{className:"variable",begin:/\$[\w_]+/},{ - match:/(\((?!;)|\))+/,className:"punctuation",relevance:0},{ - begin:[/(?:func|call|call_indirect)/,/\s+/,/\$[^\s)]+/],className:{1:"keyword", - 3:"title.function"}},e.QUOTE_STRING_MODE,{match:/(i32|i64|f32|f64)(?!\.)/, - className:"type"},{className:"keyword", - match:/\b(f32|f64|i32|i64)(?:\.(?:abs|add|and|ceil|clz|const|convert_[su]\/i(?:32|64)|copysign|ctz|demote\/f64|div(?:_[su])?|eqz?|extend_[su]\/i32|floor|ge(?:_[su])?|gt(?:_[su])?|le(?:_[su])?|load(?:(?:8|16|32)_[su])?|lt(?:_[su])?|max|min|mul|nearest|neg?|or|popcnt|promote\/f32|reinterpret\/[fi](?:32|64)|rem_[su]|rot[lr]|shl|shr_[su]|store(?:8|16|32)?|sqrt|sub|trunc(?:_[su]\/f(?:32|64))?|wrap\/i64|xor))\b/ - },{className:"number",relevance:0, - match:/[+-]?\b(?:\d(?:_?\d)*(?:\.\d(?:_?\d)*)?(?:[eE][+-]?\d(?:_?\d)*)?|0x[\da-fA-F](?:_?[\da-fA-F])*(?:\.[\da-fA-F](?:_?[\da-fA-D])*)?(?:[pP][+-]?\d(?:_?\d)*)?)\b|\binf\b|\bnan(?::0x[\da-fA-F](?:_?[\da-fA-D])*)?\b/ - }]}},grmr_xml:e=>{ - const n=e.regex,t=n.concat(/[\p{L}_]/u,n.optional(/[\p{L}0-9_.-]*:/u),/[\p{L}0-9_.-]*/u),a={ - className:"symbol",begin:/&[a-z]+;|&#[0-9]+;|&#x[a-f0-9]+;/},i={begin:/\s/, - contains:[{className:"keyword",begin:/#?[a-z_][a-z1-9_-]+/,illegal:/\n/}] - },r=e.inherit(i,{begin:/\(/,end:/\)/}),s=e.inherit(e.APOS_STRING_MODE,{ - className:"string"}),o=e.inherit(e.QUOTE_STRING_MODE,{className:"string"}),l={ - endsWithParent:!0,illegal:/`]+/}]}]}]};return{ - name:"HTML, XML", - aliases:["html","xhtml","rss","atom","xjb","xsd","xsl","plist","wsf","svg"], - case_insensitive:!0,unicodeRegex:!0,contains:[{className:"meta",begin://,relevance:10,contains:[i,o,s,r,{begin:/\[/,end:/\]/,contains:[{ - className:"meta",begin://,contains:[i,r,o,s]}]}] - },e.COMMENT(//,{relevance:10}),{begin://, - relevance:10},a,{className:"meta",end:/\?>/,variants:[{begin:/<\?xml/, - relevance:10,contains:[o]},{begin:/<\?[a-z][a-z0-9]+/}]},{className:"tag", - begin:/)/,end:/>/,keywords:{name:"style"},contains:[l],starts:{ - end:/<\/style>/,returnEnd:!0,subLanguage:["css","xml"]}},{className:"tag", - begin:/)/,end:/>/,keywords:{name:"script"},contains:[l],starts:{ - end:/<\/script>/,returnEnd:!0,subLanguage:["javascript","handlebars","xml"]}},{ - className:"tag",begin:/<>|<\/>/},{className:"tag", - begin:n.concat(//,/>/,/\s/)))), - end:/\/?>/,contains:[{className:"name",begin:t,relevance:0,starts:l}]},{ - className:"tag",begin:n.concat(/<\//,n.lookahead(n.concat(t,/>/))),contains:[{ - className:"name",begin:t,relevance:0},{begin:/>/,relevance:0,endsParent:!0}]}]} - },grmr_yaml:e=>{ - const n="true false yes no null",t="[\\w#;/?:@&=+$,.~*'()[\\]]+",a={ - className:"string",relevance:0,variants:[{begin:/'/,end:/'/},{begin:/"/,end:/"/ - },{begin:/\S+/}],contains:[e.BACKSLASH_ESCAPE,{className:"template-variable", - variants:[{begin:/\{\{/,end:/\}\}/},{begin:/%\{/,end:/\}/}]}]},i=e.inherit(a,{ - variants:[{begin:/'/,end:/'/},{begin:/"/,end:/"/},{begin:/[^\s,{}[\]]+/}]}),r={ - end:",",endsWithParent:!0,excludeEnd:!0,keywords:n,relevance:0},s={begin:/\{/, - end:/\}/,contains:[r],illegal:"\\n",relevance:0},o={begin:"\\[",end:"\\]", - contains:[r],illegal:"\\n",relevance:0},l=[{className:"attr",variants:[{ - begin:"\\w[\\w :\\/.-]*:(?=[ \t]|$)"},{begin:'"\\w[\\w :\\/.-]*":(?=[ \t]|$)'},{ - begin:"'\\w[\\w :\\/.-]*':(?=[ \t]|$)"}]},{className:"meta",begin:"^---\\s*$", - relevance:10},{className:"string", - begin:"[\\|>]([1-9]?[+-])?[ ]*\\n( +)[^ ][^\\n]*\\n(\\2[^\\n]+\\n?)*"},{ - begin:"<%[%=-]?",end:"[%-]?%>",subLanguage:"ruby",excludeBegin:!0,excludeEnd:!0, - relevance:0},{className:"type",begin:"!\\w+!"+t},{className:"type", - begin:"!<"+t+">"},{className:"type",begin:"!"+t},{className:"type",begin:"!!"+t - },{className:"meta",begin:"&"+e.UNDERSCORE_IDENT_RE+"$"},{className:"meta", - begin:"\\*"+e.UNDERSCORE_IDENT_RE+"$"},{className:"bullet",begin:"-(?=[ ]|$)", - relevance:0},e.HASH_COMMENT_MODE,{beginKeywords:n,keywords:{literal:n}},{ - className:"number", - begin:"\\b[0-9]{4}(-[0-9][0-9]){0,2}([Tt \\t][0-9][0-9]?(:[0-9][0-9]){2})?(\\.[0-9]*)?([ \\t])*(Z|[-+][0-9][0-9]?(:[0-9][0-9])?)?\\b" - },{className:"number",begin:e.C_NUMBER_RE+"\\b",relevance:0},s,o,a],c=[...l] - ;return c.pop(),c.push(i),r.contains=c,{name:"YAML",case_insensitive:!0, - aliases:["yml"],contains:l}}});const He=ae;for(const e of Object.keys(Ke)){ - const n=e.replace("grmr_","").replace("_","-");He.registerLanguage(n,Ke[e])} - return He}() - ;"object"==typeof exports&&"undefined"!=typeof module&&(module.exports=hljs); \ No newline at end of file diff --git a/docs/md/assets/highlight_init.js b/docs/md/assets/highlight_init.js deleted file mode 100644 index e237927..0000000 --- a/docs/md/assets/highlight_init.js +++ /dev/null @@ -1,6 +0,0 @@ -document.addEventListener('DOMContentLoaded', (event) => { - document.querySelectorAll('pre code').forEach((block) => { - hljs.highlightBlock(block); - }); - }); - \ No newline at end of file diff --git a/docs/md/assets/styles.css b/docs/md/assets/styles.css deleted file mode 100644 index f103474..0000000 --- a/docs/md/assets/styles.css +++ /dev/null @@ -1,153 +0,0 @@ -@font-face { - font-family: "Monaco"; - font-style: normal; - font-weight: normal; - src: local("Monaco"), url("Monaco.woff") format("woff"); -} - -:root { - --global-font-size: 16px; - --global-line-height: 1.5em; - --global-space: 10px; - --font-stack: Menlo, Monaco, Lucida Console, Liberation Mono, DejaVu Sans Mono, Bitstream Vera Sans Mono, - Courier New, monospace, serif; - --font-stack: dm, Monaco, Courier New, monospace, serif; - --mono-font-stack: Menlo, Monaco, Lucida Console, Liberation Mono, DejaVu Sans Mono, Bitstream Vera Sans Mono, - Courier New, monospace, serif; - - --background-color: #151515; /* Dark background */ - --font-color: #eaeaea; /* Light font color for contrast */ - --invert-font-color: #151515; /* Dark color for inverted elements */ - --primary-color: #1a95e0; /* Primary color can remain the same or be adjusted for better contrast */ - --secondary-color: #727578; /* Secondary color for less important text */ - --error-color: #ff5555; /* Bright color for errors */ - --progress-bar-background: #444; /* Darker background for progress bar */ - --progress-bar-fill: #1a95e0; /* Bright color for progress bar fill */ - --code-bg-color: #1e1e1e; /* Darker background for code blocks */ - --input-style: solid; /* Keeping input style solid */ - --block-background-color: #202020; /* Darker background for block elements */ - --global-font-color: #eaeaea; /* Light font color for global elements */ - - --background-color: #222225; - - --background-color: #070708; - --page-width: 70em; - --font-color: #e8e9ed; - --invert-font-color: #222225; - --secondary-color: #a3abba; - --secondary-color: #d5cec0; - --tertiary-color: #a3abba; - --primary-color: #09b5a5; /* Updated to the brand color */ - --primary-color: #50ffff; /* Updated to the brand color */ - --error-color: #ff3c74; - --progress-bar-background: #3f3f44; - --progress-bar-fill: #09b5a5; /* Updated to the brand color */ - --code-bg-color: #3f3f44; - --input-style: solid; - --display-h1-decoration: none; - - --display-h1-decoration: none; -} - -/* body { - background-color: var(--background-color); - color: var(--font-color); -} - -a { - color: var(--primary-color); -} - -a:hover { - background-color: var(--primary-color); - color: var(--invert-font-color); -} - -blockquote::after { - color: #444; -} - -pre, code { - background-color: var(--code-bg-color); - color: var(--font-color); -} - -.terminal-nav:first-child { - border-bottom: 1px dashed var(--secondary-color); -} */ - -.terminal-mkdocs-main-content { - line-height: var(--global-line-height); -} - -strong, -.highlight { - /* background: url(//s2.svgbox.net/pen-brushes.svg?ic=brush-1&color=50ffff); */ - background-color: #50ffff33; -} - -.terminal-card > header { - color: var(--font-color); - text-align: center; - background-color: var(--progress-bar-background); - padding: 0.3em 0.5em; -} -.btn.btn-sm { - color: var(--font-color); - padding: 0.2em 0.5em; - font-size: 0.8em; -} - -.loading-message { - display: none; - margin-top: 20px; -} - -.response-section { - display: none; - padding-top: 20px; -} - -.tabs { - display: flex; - flex-direction: column; -} -.tab-list { - display: flex; - padding: 0; - margin: 0; - list-style-type: none; - border-bottom: 1px solid var(--font-color); -} -.tab-item { - cursor: pointer; - padding: 10px; - border: 1px solid var(--font-color); - margin-right: -1px; - border-bottom: none; -} -.tab-item:hover, -.tab-item:focus, -.tab-item:active { - background-color: var(--progress-bar-background); -} -.tab-content { - display: none; - border: 1px solid var(--font-color); - border-top: none; -} -.tab-content:first-of-type { - display: block; -} - -.tab-content header { - padding: 0.5em; - display: flex; - justify-content: end; - align-items: center; - background-color: var(--progress-bar-background); -} -.tab-content pre { - margin: 0; - max-height: 300px; overflow: auto; border:none; -} \ No newline at end of file diff --git a/docs/md/changelog.md b/docs/md/changelog.md deleted file mode 100644 index 03e490f..0000000 --- a/docs/md/changelog.md +++ /dev/null @@ -1,102 +0,0 @@ -# Changelog - -## [v0.2.77] - 2024-08-04 - -Significant improvements in text processing and performance: - -- 🚀 **Dependency reduction**: Removed dependency on spaCy model for text chunk labeling in cosine extraction strategy. -- 🤖 **Transformer upgrade**: Implemented text sequence classification using a transformer model for labeling text chunks. -- ⚡ **Performance enhancement**: Improved model loading speed due to removal of spaCy dependency. -- 🔧 **Future-proofing**: Laid groundwork for potential complete removal of spaCy dependency in future versions. - -These changes address issue #68 and provide a foundation for faster, more efficient text processing in Crawl4AI. - -## [v0.2.76] - 2024-08-02 - -Major improvements in functionality, performance, and cross-platform compatibility! 🚀 - -- 🐳 **Docker enhancements**: Significantly improved Dockerfile for easy installation on Linux, Mac, and Windows. -- 🌐 **Official Docker Hub image**: Launched our first official image on Docker Hub for streamlined deployment. -- 🔧 **Selenium upgrade**: Removed dependency on ChromeDriver, now using Selenium's built-in capabilities for better compatibility. -- 🖼️ **Image description**: Implemented ability to generate textual descriptions for extracted images from web pages. -- ⚡ **Performance boost**: Various improvements to enhance overall speed and performance. - -A big shoutout to our amazing community contributors: -- [@aravindkarnam](https://github.com/aravindkarnam) for developing the textual description extraction feature. -- [@FractalMind](https://github.com/FractalMind) for creating the first official Docker Hub image and fixing Dockerfile errors. -- [@ketonkss4](https://github.com/ketonkss4) for identifying Selenium's new capabilities, helping us reduce dependencies. - -Your contributions are driving Crawl4AI forward! 🙌 - -## [v0.2.75] - 2024-07-19 - -Minor improvements for a more maintainable codebase: - -- 🔄 Fixed typos in `chunking_strategy.py` and `crawler_strategy.py` to improve code readability -- 🔄 Removed `.test_pads/` directory from `.gitignore` to keep our repository clean and organized - -These changes may seem small, but they contribute to a more stable and sustainable codebase. By fixing typos and updating our `.gitignore` settings, we're ensuring that our code is easier to maintain and scale in the long run. - - -## v0.2.74 - 2024-07-08 -A slew of exciting updates to improve the crawler's stability and robustness! 🎉 - -- 💻 **UTF encoding fix**: Resolved the Windows \"charmap\" error by adding UTF encoding. -- 🛡️ **Error handling**: Implemented MaxRetryError exception handling in LocalSeleniumCrawlerStrategy. -- 🧹 **Input sanitization**: Improved input sanitization and handled encoding issues in LLMExtractionStrategy. -- 🚮 **Database cleanup**: Removed existing database file and initialized a new one. - -## [v0.2.73] - 2024-07-03 - -💡 In this release, we've bumped the version to v0.2.73 and refreshed our documentation to ensure you have the best experience with our project. - -* Supporting website need "with-head" mode to crawl the website with head. -* Fixing the installation issues for setup.py and dockerfile. -* Resolve multiple issues. - -## [v0.2.72] - 2024-06-30 - -This release brings exciting updates and improvements to our project! 🎉 - -* 📚 **Documentation Updates**: Our documentation has been revamped to reflect the latest changes and additions. -* 🚀 **New Modes in setup.py**: We've added support for three new modes in setup.py: default, torch, and transformers. This enhances the project's flexibility and usability. -* 🐳 **Docker File Updates**: The Docker file has been updated to ensure seamless compatibility with the new modes and improvements. -* 🕷️ **Temporary Solution for Headless Crawling**: We've implemented a temporary solution to overcome issues with crawling websites in headless mode. - -These changes aim to improve the overall user experience, provide more flexibility, and enhance the project's performance. We're thrilled to share these updates with you and look forward to continuing to evolve and improve our project! - -## [0.2.71] - 2024-06-26 - -**Improved Error Handling and Performance** 🚧 - -* 🚫 Refactored `crawler_strategy.py` to handle exceptions and provide better error messages, making it more robust and reliable. -* 💻 Optimized the `get_content_of_website_optimized` function in `utils.py` for improved performance, reducing potential bottlenecks. -* 💻 Updated `utils.py` with the latest changes, ensuring consistency and accuracy. -* 🚫 Migrated to `ChromeDriverManager` to resolve Chrome driver download issues, providing a smoother user experience. - -These changes focus on refining the existing codebase, resulting in a more stable, efficient, and user-friendly experience. With these improvements, you can expect fewer errors and better performance in the crawler strategy and utility functions. - -## [0.2.71] - 2024-06-25 -### Fixed -- Speed up twice the extraction function. - -## [0.2.6] - 2024-06-22 -### Fixed -- Fix issue #19: Update Dockerfile to ensure compatibility across multiple platforms. - -## [0.2.5] - 2024-06-18 -### Added -- Added five important hooks to the crawler: - - on_driver_created: Called when the driver is ready for initializations. - - before_get_url: Called right before Selenium fetches the URL. - - after_get_url: Called after Selenium fetches the URL. - - before_return_html: Called when the data is parsed and ready. - - on_user_agent_updated: Called when the user changes the user_agent, causing the driver to reinitialize. -- Added an example in `quickstart.py` in the example folder under the docs. -- Enhancement issue #24: Replaced inline HTML tags (e.g., DEL, INS, SUB, ABBR) with textual format for better context handling in LLM. -- Maintaining the semantic context of inline tags (e.g., abbreviation, DEL, INS) for improved LLM-friendliness. -- Updated Dockerfile to ensure compatibility across multiple platforms (Hopefully!). - -## [0.2.4] - 2024-06-17 -### Fixed -- Fix issue #22: Use MD5 hash for caching HTML files to handle long URLs diff --git a/docs/md/contact.md b/docs/md/contact.md deleted file mode 100644 index 85faa21..0000000 --- a/docs/md/contact.md +++ /dev/null @@ -1,25 +0,0 @@ -# Contact -If you have any questions, suggestions, or feedback, please feel free to reach out to us: - -- GitHub: [unclecode](https://github.com/unclecode) -- Twitter: [@unclecode](https://twitter.com/unclecode) -- Website: [crawl4ai.com](https://crawl4ai.com) - - -## Contributing 🤝 - -We welcome contributions from the open-source community to help improve Crawl4AI and make it even more valuable for AI enthusiasts and developers. To contribute, please follow these steps: - -1. Fork the repository. -2. Create a new branch for your feature or bug fix. -3. Make your changes and commit them with descriptive messages. -4. Push your changes to your forked repository. -5. Submit a pull request to the main repository. - -For more information on contributing, please see our [contribution guidelines](https://github.com/unclecode/crawl4ai/blob/main/CONTRIBUTING.md). - -## License 📄 - -Crawl4AI is released under the [Apache 2.0 License](https://github.com/unclecode/crawl4ai/blob/main/LICENSE). - -Let's work together to make the web more accessible and useful for AI applications! 💪🌐🤖 diff --git a/docs/md/demo.md b/docs/md/demo.md deleted file mode 100644 index 4ca6c75..0000000 --- a/docs/md/demo.md +++ /dev/null @@ -1,231 +0,0 @@ -# Interactive Demo for Crowler -

    -
    -
    - Enter URL and Options -
    - - -
    -
    - - -
    -
    - -
    - -
    -
    - -
    -
    Loading... Please wait.
    -
    - -
    -

    Response

    -
    -
      -
    • Markdown
    • -
    • Cleaned HTML
    • -
    • Media
    • -
    • Extracted Content
    • -
    • Screenshot
    • -
    • Python Code
    • -
    -
    -
    -
    - - -
    -
    -
    -
    - - - - - - - - - - -
    -
    - - - - -
    \ No newline at end of file diff --git a/docs/md/examples/index.md b/docs/md/examples/index.md deleted file mode 100644 index a663415..0000000 --- a/docs/md/examples/index.md +++ /dev/null @@ -1,33 +0,0 @@ -# Examples - -Welcome to the examples section of Crawl4AI documentation! In this section, you will find practical examples demonstrating how to use Crawl4AI for various web crawling and data extraction tasks. Each example is designed to showcase different features and capabilities of the library. - -## Examples Index - -### [LLM Extraction](llm_extraction.md) - -This example demonstrates how to use Crawl4AI to extract information using Large Language Models (LLMs). You will learn how to configure the `LLMExtractionStrategy` to get structured data from web pages. - -### [JSON CSS Extraction](json_css_extraction.md) - -This example demonstrates how to use Crawl4AI to extract structured data without using LLM, and just focusing on page structure. You will learn how to use the `JsonCssExtractionStrategy` to extract data using CSS selectors. - -### [JS Execution & CSS Filtering](js_execution_css_filtering.md) - -Learn how to execute custom JavaScript code and filter data using CSS selectors. This example shows how to perform complex web interactions and extract specific content from web pages. - -### [Hooks & Auth](hooks_auth.md) - -This example covers the use of custom hooks for authentication and other pre-crawling tasks. You will see how to set up hooks to modify headers, authenticate sessions, and perform other preparatory actions before crawling. - -### [Summarization](summarization.md) - -Discover how to use Crawl4AI to summarize web page content. This example demonstrates the summarization capabilities of the library, helping you extract concise information from lengthy web pages. - -### [Research Assistant](research_assistant.md) - -In this example, Crawl4AI is used as a research assistant to gather and organize information from multiple sources. You will learn how to use various extraction and chunking strategies to compile a comprehensive report. - ---- - -Each example includes detailed explanations and code snippets to help you understand and implement the features in your projects. Click on the links to explore each example and start making the most of Crawl4AI! diff --git a/docs/md/examples/js_execution_css_filtering.md b/docs/md/examples/js_execution_css_filtering.md deleted file mode 100644 index b45a660..0000000 --- a/docs/md/examples/js_execution_css_filtering.md +++ /dev/null @@ -1,104 +0,0 @@ -# JS Execution & CSS Filtering with AsyncWebCrawler - -In this example, we'll demonstrate how to use Crawl4AI's AsyncWebCrawler to execute JavaScript, filter data with CSS selectors, and use a cosine similarity strategy to extract relevant content. This approach is particularly useful when you need to interact with dynamic content on web pages, such as clicking "Load More" buttons. - -## Example: Extracting Structured Data Asynchronously - -```python -import asyncio -from crawl4ai import AsyncWebCrawler -from crawl4ai.chunking_strategy import RegexChunking -from crawl4ai.extraction_strategy import CosineStrategy -from crawl4ai.async_crawler_strategy import AsyncPlaywrightCrawlerStrategy - -async def main(): - # Define the JavaScript code to click the "Load More" button - js_code = """ - const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); - if (loadMoreButton) { - loadMoreButton.click(); - // Wait for new content to load - await new Promise(resolve => setTimeout(resolve, 2000)); - } - """ - - # Define a wait_for function to ensure content is loaded - wait_for = """ - () => { - const articles = document.querySelectorAll('article.tease-card'); - return articles.length > 10; - } - """ - - async with AsyncWebCrawler(verbose=True) as crawler: - # Run the crawler with keyword filtering and CSS selector - result = await crawler.arun( - url="https://www.nbcnews.com/business", - js_code=js_code, - wait_for=wait_for, - css_selector="article.tease-card", - extraction_strategy=CosineStrategy( - semantic_filter="technology", - ), - chunking_strategy=RegexChunking(), - ) - - # Display the extracted result - print(result.extracted_content) - -# Run the async function -asyncio.run(main()) -``` - -### Explanation - -1. **Asynchronous Execution**: We use `AsyncWebCrawler` with async/await syntax for non-blocking execution. - -2. **JavaScript Execution**: The `js_code` variable contains JavaScript code that simulates clicking a "Load More" button and waits for new content to load. - -3. **Wait Condition**: The `wait_for` function ensures that the page has loaded more than 10 articles before proceeding with the extraction. - -4. **CSS Selector**: The `css_selector="article.tease-card"` parameter ensures that only article cards are extracted from the web page. - -5. **Extraction Strategy**: The `CosineStrategy` is used with a semantic filter for "technology" to extract relevant content based on cosine similarity. - -6. **Chunking Strategy**: We use `RegexChunking()` to split the content into manageable chunks for processing. - -## Advanced Usage: Custom Session and Multiple Requests - -For more complex scenarios where you need to maintain state across multiple requests or execute additional JavaScript after the initial page load, you can use a custom session: - -```python -async def advanced_crawl(): - async with AsyncWebCrawler(verbose=True) as crawler: - # Initial crawl with custom session - result1 = await crawler.arun( - url="https://www.nbcnews.com/business", - js_code=js_code, - wait_for=wait_for, - css_selector="article.tease-card", - session_id="business_session" - ) - - # Execute additional JavaScript in the same session - result2 = await crawler.crawler_strategy.execute_js( - session_id="business_session", - js_code="window.scrollTo(0, document.body.scrollHeight);", - wait_for_js="() => window.innerHeight + window.scrollY >= document.body.offsetHeight" - ) - - # Process results - print("Initial crawl result:", result1.extracted_content) - print("Additional JS execution result:", result2.html) - -asyncio.run(advanced_crawl()) -``` - -This advanced example demonstrates how to: -1. Use a custom session to maintain state across requests. -2. Execute additional JavaScript after the initial page load. -3. Wait for specific conditions using JavaScript functions. - -## Try It Yourself - -These examples demonstrate the power and flexibility of Crawl4AI's AsyncWebCrawler in handling complex web interactions and extracting meaningful data asynchronously. You can customize the JavaScript code, CSS selectors, extraction strategies, and waiting conditions to suit your specific requirements. \ No newline at end of file diff --git a/docs/md/examples/research_assistant.md b/docs/md/examples/research_assistant.md deleted file mode 100644 index 61886f9..0000000 --- a/docs/md/examples/research_assistant.md +++ /dev/null @@ -1,220 +0,0 @@ -# Research Assistant Example with AsyncWebCrawler - -This example demonstrates how to build an advanced research assistant using `Chainlit`, `Crawl4AI`'s `AsyncWebCrawler`, and various AI services. The assistant can crawl web pages asynchronously, answer questions based on the crawled content, and handle audio inputs. - -## Step-by-Step Guide - -1. **Install Required Packages** - - Ensure you have the necessary packages installed: - - ```bash - pip install chainlit groq openai crawl4ai - ``` - -2. **Import Libraries** - - ```python - import os - import time - import asyncio - from openai import AsyncOpenAI - import chainlit as cl - import re - from io import BytesIO - from chainlit.element import ElementBased - from groq import Groq - from crawl4ai import AsyncWebCrawler - from crawl4ai.extraction_strategy import NoExtractionStrategy - from crawl4ai.chunking_strategy import RegexChunking - - client = AsyncOpenAI(base_url="https://api.groq.com/openai/v1", api_key=os.getenv("GROQ_API_KEY")) - - # Instrument the OpenAI client - cl.instrument_openai() - ``` - -3. **Set Configuration** - - ```python - settings = { - "model": "llama3-8b-8192", - "temperature": 0.5, - "max_tokens": 500, - "top_p": 1, - "frequency_penalty": 0, - "presence_penalty": 0, - } - ``` - -4. **Define Utility Functions** - - ```python - def extract_urls(text): - url_pattern = re.compile(r'(https?://\S+)') - return url_pattern.findall(text) - - async def crawl_urls(urls): - async with AsyncWebCrawler(verbose=True) as crawler: - results = await crawler.arun_many( - urls=urls, - word_count_threshold=10, - extraction_strategy=NoExtractionStrategy(), - chunking_strategy=RegexChunking(), - bypass_cache=True - ) - return [result.markdown for result in results if result.success] - ``` - -5. **Initialize Chat Start Event** - - ```python - @cl.on_chat_start - async def on_chat_start(): - cl.user_session.set("session", { - "history": [], - "context": {} - }) - await cl.Message(content="Welcome to the chat! How can I assist you today?").send() - ``` - -6. **Handle Incoming Messages** - - ```python - @cl.on_message - async def on_message(message: cl.Message): - user_session = cl.user_session.get("session") - - # Extract URLs from the user's message - urls = extract_urls(message.content) - - if urls: - crawled_contents = await crawl_urls(urls) - for url, content in zip(urls, crawled_contents): - ref_number = f"REF_{len(user_session['context']) + 1}" - user_session["context"][ref_number] = { - "url": url, - "content": content - } - - user_session["history"].append({ - "role": "user", - "content": message.content - }) - - # Create a system message that includes the context - context_messages = [ - f'\n{data["content"]}\n' - for ref, data in user_session["context"].items() - ] - system_message = { - "role": "system", - "content": ( - "You are a helpful bot. Use the following context for answering questions. " - "Refer to the sources using the REF number in square brackets, e.g., [1], only if the source is given in the appendices below.\n\n" - "If the question requires any information from the provided appendices or context, refer to the sources. " - "If not, there is no need to add a references section. " - "At the end of your response, provide a reference section listing the URLs and their REF numbers only if sources from the appendices were used.\n\n" - "\n\n".join(context_messages) - ) if context_messages else "You are a helpful assistant." - } - - msg = cl.Message(content="") - await msg.send() - - # Get response from the LLM - stream = await client.chat.completions.create( - messages=[system_message, *user_session["history"]], - stream=True, - **settings - ) - - assistant_response = "" - async for part in stream: - if token := part.choices[0].delta.content: - assistant_response += token - await msg.stream_token(token) - - # Add assistant message to the history - user_session["history"].append({ - "role": "assistant", - "content": assistant_response - }) - await msg.update() - - # Append the reference section to the assistant's response - if user_session["context"]: - reference_section = "\n\nReferences:\n" - for ref, data in user_session["context"].items(): - reference_section += f"[{ref.split('_')[1]}]: {data['url']}\n" - msg.content += reference_section - await msg.update() - ``` - -7. **Handle Audio Input** - - ```python - @cl.on_audio_chunk - async def on_audio_chunk(chunk: cl.AudioChunk): - if chunk.isStart: - buffer = BytesIO() - buffer.name = f"input_audio.{chunk.mimeType.split('/')[1]}" - cl.user_session.set("audio_buffer", buffer) - cl.user_session.set("audio_mime_type", chunk.mimeType) - cl.user_session.get("audio_buffer").write(chunk.data) - - @cl.step(type="tool") - async def speech_to_text(audio_file): - response = await client.audio.transcriptions.create( - model="whisper-large-v3", file=audio_file - ) - return response.text - - @cl.on_audio_end - async def on_audio_end(elements: list[ElementBased]): - audio_buffer: BytesIO = cl.user_session.get("audio_buffer") - audio_buffer.seek(0) - audio_file = audio_buffer.read() - audio_mime_type: str = cl.user_session.get("audio_mime_type") - - start_time = time.time() - transcription = await speech_to_text((audio_buffer.name, audio_file, audio_mime_type)) - end_time = time.time() - print(f"Transcription took {end_time - start_time} seconds") - - user_msg = cl.Message(author="You", type="user_message", content=transcription) - await user_msg.send() - await on_message(user_msg) - ``` - -8. **Run the Chat Application** - - ```python - if __name__ == "__main__": - from chainlit.cli import run_chainlit - run_chainlit(__file__) - ``` - -## Explanation - -- **Libraries and Configuration**: We import necessary libraries, including `AsyncWebCrawler` from `crawl4ai`. -- **Utility Functions**: - - `extract_urls`: Uses regex to find URLs in messages. - - `crawl_urls`: An asynchronous function that uses `AsyncWebCrawler` to fetch content from multiple URLs concurrently. -- **Chat Start Event**: Initializes the chat session and sends a welcome message. -- **Message Handling**: - - Extracts URLs from user messages. - - Asynchronously crawls the URLs using `AsyncWebCrawler`. - - Updates chat history and context with crawled content. - - Generates a response using the LLM, incorporating the crawled context. -- **Audio Handling**: Captures, buffers, and transcribes audio input, then processes the transcription as text. -- **Running the Application**: Starts the Chainlit server for interaction with the assistant. - -## Key Improvements - -1. **Asynchronous Web Crawling**: Using `AsyncWebCrawler` allows for efficient, concurrent crawling of multiple URLs. -2. **Improved Context Management**: The assistant now maintains a context of crawled content, allowing for more informed responses. -3. **Dynamic Reference System**: The assistant can refer to specific sources in its responses and provide a reference section. -4. **Seamless Audio Integration**: The ability to handle audio inputs makes the assistant more versatile and user-friendly. - -This updated Research Assistant showcases how to create a powerful, interactive tool that can efficiently fetch and process web content, handle various input types, and provide informed responses based on the gathered information. \ No newline at end of file diff --git a/docs/md/examples/summarization.md b/docs/md/examples/summarization.md deleted file mode 100644 index 12c1847..0000000 --- a/docs/md/examples/summarization.md +++ /dev/null @@ -1,153 +0,0 @@ -# Summarization Example with AsyncWebCrawler - -This example demonstrates how to use Crawl4AI's `AsyncWebCrawler` to extract a summary from a web page asynchronously. The goal is to obtain the title, a detailed summary, a brief summary, and a list of keywords from the given page. - -## Step-by-Step Guide - -1. **Import Necessary Modules** - - First, import the necessary modules and classes: - - ```python - import os - import json - import asyncio - from crawl4ai import AsyncWebCrawler - from crawl4ai.extraction_strategy import LLMExtractionStrategy - from crawl4ai.chunking_strategy import RegexChunking - from pydantic import BaseModel, Field - ``` - -2. **Define the URL to be Crawled** - - Set the URL of the web page you want to summarize: - - ```python - url = 'https://marketplace.visualstudio.com/items?itemName=Unclecode.groqopilot' - ``` - -3. **Define the Data Model** - - Use Pydantic to define the structure of the extracted data: - - ```python - class PageSummary(BaseModel): - title: str = Field(..., description="Title of the page.") - summary: str = Field(..., description="Summary of the page.") - brief_summary: str = Field(..., description="Brief summary of the page.") - keywords: list = Field(..., description="Keywords assigned to the page.") - ``` - -4. **Create the Extraction Strategy** - - Set up the `LLMExtractionStrategy` with the necessary parameters: - - ```python - extraction_strategy = LLMExtractionStrategy( - provider="openai/gpt-4o", - api_token=os.getenv('OPENAI_API_KEY'), - schema=PageSummary.model_json_schema(), - extraction_type="schema", - apply_chunking=False, - instruction=( - "From the crawled content, extract the following details: " - "1. Title of the page " - "2. Summary of the page, which is a detailed summary " - "3. Brief summary of the page, which is a paragraph text " - "4. Keywords assigned to the page, which is a list of keywords. " - 'The extracted JSON format should look like this: ' - '{ "title": "Page Title", "summary": "Detailed summary of the page.", ' - '"brief_summary": "Brief summary in a paragraph.", "keywords": ["keyword1", "keyword2", "keyword3"] }' - ) - ) - ``` - -5. **Define the Async Crawl Function** - - Create an asynchronous function to run the crawler: - - ```python - async def crawl_and_summarize(url): - async with AsyncWebCrawler(verbose=True) as crawler: - result = await crawler.arun( - url=url, - word_count_threshold=1, - extraction_strategy=extraction_strategy, - chunking_strategy=RegexChunking(), - bypass_cache=True, - ) - return result - ``` - -6. **Run the Crawler and Process Results** - - Use asyncio to run the crawler and process the results: - - ```python - async def main(): - result = await crawl_and_summarize(url) - - if result.success: - page_summary = json.loads(result.extracted_content) - print("Extracted Page Summary:") - print(json.dumps(page_summary, indent=2)) - - # Save the extracted data - with open(".data/page_summary.json", "w", encoding="utf-8") as f: - json.dump(page_summary, f, indent=2) - print("Page summary saved to .data/page_summary.json") - else: - print(f"Failed to crawl and summarize the page. Error: {result.error_message}") - - # Run the async main function - asyncio.run(main()) - ``` - -## Explanation - -- **Importing Modules**: We import the necessary modules, including `AsyncWebCrawler` and `LLMExtractionStrategy` from Crawl4AI. -- **URL Definition**: We set the URL of the web page to crawl and summarize. -- **Data Model Definition**: We define the structure of the data to extract using Pydantic's `BaseModel`. -- **Extraction Strategy Setup**: We create an instance of `LLMExtractionStrategy` with the schema and detailed instructions for the extraction process. -- **Async Crawl Function**: We define an asynchronous function `crawl_and_summarize` that uses `AsyncWebCrawler` to perform the crawling and extraction. -- **Main Execution**: In the `main` function, we run the crawler, process the results, and save the extracted data. - -## Advanced Usage: Crawling Multiple URLs - -To demonstrate the power of `AsyncWebCrawler`, here's how you can summarize multiple pages concurrently: - -```python -async def crawl_multiple_urls(urls): - async with AsyncWebCrawler(verbose=True) as crawler: - tasks = [crawler.arun( - url=url, - word_count_threshold=1, - extraction_strategy=extraction_strategy, - chunking_strategy=RegexChunking(), - bypass_cache=True - ) for url in urls] - results = await asyncio.gather(*tasks) - return results - -async def main(): - urls = [ - 'https://marketplace.visualstudio.com/items?itemName=Unclecode.groqopilot', - 'https://marketplace.visualstudio.com/items?itemName=GitHub.copilot', - 'https://marketplace.visualstudio.com/items?itemName=ms-python.python' - ] - results = await crawl_multiple_urls(urls) - - for i, result in enumerate(results): - if result.success: - page_summary = json.loads(result.extracted_content) - print(f"\nSummary for URL {i+1}:") - print(json.dumps(page_summary, indent=2)) - else: - print(f"\nFailed to summarize URL {i+1}. Error: {result.error_message}") - -asyncio.run(main()) -``` - -This advanced example shows how to use `AsyncWebCrawler` to efficiently summarize multiple web pages concurrently, significantly reducing the total processing time compared to sequential crawling. - -By leveraging the asynchronous capabilities of Crawl4AI, you can perform advanced web crawling and data extraction tasks with improved efficiency and scalability. \ No newline at end of file diff --git a/docs/md/full_details/advanced_features.md b/docs/md/full_details/advanced_features.md deleted file mode 100644 index 944f6e0..0000000 --- a/docs/md/full_details/advanced_features.md +++ /dev/null @@ -1,138 +0,0 @@ -# Advanced Features - -Crawl4AI offers a range of advanced features that allow you to fine-tune your web crawling and data extraction process. This section will cover some of these advanced features, including taking screenshots, extracting media and links, customizing the user agent, using custom hooks, and leveraging CSS selectors. - -## Taking Screenshots 📸 - -One of the cool features of Crawl4AI is the ability to take screenshots of the web pages you're crawling. This can be particularly useful for visual verification or for capturing the state of dynamic content. - -Here's how you can take a screenshot: - -```python -from crawl4ai import WebCrawler -import base64 - -# Create the WebCrawler instance -crawler = WebCrawler() -crawler.warmup() - -# Run the crawler with the screenshot parameter -result = crawler.run(url="https://www.nbcnews.com/business", screenshot=True) - -# Save the screenshot to a file -with open("screenshot.png", "wb") as f: - f.write(base64.b64decode(result.screenshot)) - -print("Screenshot saved to 'screenshot.png'!") -``` - -In this example, we create a `WebCrawler` instance, warm it up, and then run it with the `screenshot` parameter set to `True`. The screenshot is saved as a base64 encoded string in the result, which we then decode and save as a PNG file. - -## Extracting Media and Links 🎨🔗 - -Crawl4AI can extract all media tags (images, audio, and video) and links (both internal and external) from a web page. This feature is useful for collecting multimedia content or analyzing link structures. - -Here's an example: - -```python -from crawl4ai import WebCrawler - -# Create the WebCrawler instance -crawler = WebCrawler() -crawler.warmup() - -# Run the crawler -result = crawler.run(url="https://www.nbcnews.com/business") - -print("Extracted media:", result.media) -print("Extracted links:", result.links) -``` - -In this example, the `result` object contains dictionaries for media and links, which you can access and use as needed. - -## Customizing the User Agent 🕵️‍♂️ - -Crawl4AI allows you to set a custom user agent for your HTTP requests. This can help you avoid detection by web servers or simulate different browsing environments. - -Here's how to set a custom user agent: - -```python -from crawl4ai import WebCrawler - -# Create the WebCrawler instance -crawler = WebCrawler() -crawler.warmup() - -# Run the crawler with a custom user agent -result = crawler.run(url="https://www.nbcnews.com/business", user_agent="Mozilla/5.0 (compatible; MyCrawler/1.0)") - -print("Crawl result:", result) -``` - -In this example, we specify a custom user agent string when running the crawler. - -## Using Custom Hooks 🪝 - -Hooks are a powerful feature in Crawl4AI that allow you to customize the crawling process at various stages. You can define hooks for actions such as driver initialization, before and after URL fetching, and before returning the HTML. - -Here's an example of using hooks: - -```python -from crawl4ai import WebCrawler -from selenium.webdriver.common.by import By -from selenium.webdriver.support.ui import WebDriverWait -from selenium.webdriver.support import expected_conditions as EC - -# Define the hooks -def on_driver_created(driver): - driver.maximize_window() - driver.get('https://example.com/login') - WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.NAME, 'username'))).send_keys('testuser') - driver.find_element(By.NAME, 'password').send_keys('password123') - driver.find_element(By.NAME, 'login').click() - return driver - -def before_get_url(driver): - driver.execute_cdp_cmd('Network.setExtraHTTPHeaders', {'headers': {'X-Test-Header': 'test'}}) - return driver - -# Create the WebCrawler instance -crawler = WebCrawler() -crawler.warmup() - -# Set the hooks -crawler.set_hook('on_driver_created', on_driver_created) -crawler.set_hook('before_get_url', before_get_url) - -# Run the crawler -result = crawler.run(url="https://example.com") - -print("Crawl result:", result) -``` - -In this example, we define hooks to handle driver initialization and custom headers before fetching the URL. - -## Using CSS Selectors 🎯 - -CSS selectors allow you to target specific elements on a web page for extraction. This can be useful for scraping structured content, such as articles or product details. - -Here's an example of using a CSS selector: - -```python -from crawl4ai import WebCrawler - -# Create the WebCrawler instance -crawler = WebCrawler() -crawler.warmup() - -# Run the crawler with a CSS selector to extract only H2 tags -result = crawler.run(url="https://www.nbcnews.com/business", css_selector="h2") - -print("Extracted H2 tags:", result.extracted_content) -``` - -In this example, we use the `css_selector` parameter to extract only the H2 tags from the web page. - ---- - -With these advanced features, you can leverage Crawl4AI to perform sophisticated web crawling and data extraction tasks. Whether you need to take screenshots, extract specific elements, customize the crawling process, or set custom headers, Crawl4AI provides the flexibility and power to meet your needs. Happy crawling! 🕷️🚀 diff --git a/docs/md/full_details/chunking_strategies.md b/docs/md/full_details/chunking_strategies.md deleted file mode 100644 index f429310..0000000 --- a/docs/md/full_details/chunking_strategies.md +++ /dev/null @@ -1,133 +0,0 @@ -## Chunking Strategies 📚 - -Crawl4AI provides several powerful chunking strategies to divide text into manageable parts for further processing. Each strategy has unique characteristics and is suitable for different scenarios. Let's explore them one by one. - -### RegexChunking - -`RegexChunking` splits text using regular expressions. This is ideal for creating chunks based on specific patterns like paragraphs or sentences. - -#### When to Use -- Great for structured text with consistent delimiters. -- Suitable for documents where specific patterns (e.g., double newlines, periods) indicate logical chunks. - -#### Parameters -- `patterns` (list, optional): Regular expressions used to split the text. Default is to split by double newlines (`['\n\n']`). - -#### Example -```python -from crawl4ai.chunking_strategy import RegexChunking - -# Define patterns for splitting text -patterns = [r'\n\n', r'\. '] -chunker = RegexChunking(patterns=patterns) - -# Sample text -text = "This is a sample text. It will be split into chunks.\n\nThis is another paragraph." - -# Chunk the text -chunks = chunker.chunk(text) -print(chunks) -``` - -### NlpSentenceChunking - -`NlpSentenceChunking` uses NLP models to split text into sentences, ensuring accurate sentence boundaries. - -#### When to Use -- Ideal for texts where sentence boundaries are crucial. -- Useful for creating chunks that preserve grammatical structures. - -#### Parameters -- None. - -#### Example -```python -from crawl4ai.chunking_strategy import NlpSentenceChunking - -chunker = NlpSentenceChunking() - -# Sample text -text = "This is a sample text. It will be split into sentences. Here's another sentence." - -# Chunk the text -chunks = chunker.chunk(text) -print(chunks) -``` - -### TopicSegmentationChunking - -`TopicSegmentationChunking` employs the TextTiling algorithm to segment text into topic-based chunks. This method identifies thematic boundaries. - -#### When to Use -- Perfect for long documents with distinct topics. -- Useful when preserving topic continuity is more important than maintaining text order. - -#### Parameters -- `num_keywords` (int, optional): Number of keywords for each topic segment. Default is `3`. - -#### Example -```python -from crawl4ai.chunking_strategy import TopicSegmentationChunking - -chunker = TopicSegmentationChunking(num_keywords=3) - -# Sample text -text = "This document contains several topics. Topic one discusses AI. Topic two covers machine learning." - -# Chunk the text -chunks = chunker.chunk(text) -print(chunks) -``` - -### FixedLengthWordChunking - -`FixedLengthWordChunking` splits text into chunks based on a fixed number of words. This ensures each chunk has approximately the same length. - -#### When to Use -- Suitable for processing large texts where uniform chunk size is important. -- Useful when the number of words per chunk needs to be controlled. - -#### Parameters -- `chunk_size` (int, optional): Number of words per chunk. Default is `100`. - -#### Example -```python -from crawl4ai.chunking_strategy import FixedLengthWordChunking - -chunker = FixedLengthWordChunking(chunk_size=10) - -# Sample text -text = "This is a sample text. It will be split into chunks of fixed length." - -# Chunk the text -chunks = chunker.chunk(text) -print(chunks) -``` - -### SlidingWindowChunking - -`SlidingWindowChunking` uses a sliding window approach to create overlapping chunks. Each chunk has a fixed length, and the window slides by a specified step size. - -#### When to Use -- Ideal for creating overlapping chunks to preserve context. -- Useful for tasks where context from adjacent chunks is needed. - -#### Parameters -- `window_size` (int, optional): Number of words in each chunk. Default is `100`. -- `step` (int, optional): Number of words to slide the window. Default is `50`. - -#### Example -```python -from crawl4ai.chunking_strategy import SlidingWindowChunking - -chunker = SlidingWindowChunking(window_size=10, step=5) - -# Sample text -text = "This is a sample text. It will be split using a sliding window approach to preserve context." - -# Chunk the text -chunks = chunker.chunk(text) -print(chunks) -``` - -With these chunking strategies, you can choose the best method to divide your text based on your specific needs. Whether you need precise sentence boundaries, topic-based segmentation, or uniform chunk sizes, Crawl4AI has you covered. Happy chunking! 📝✨ diff --git a/docs/md/full_details/crawl_request_parameters.md b/docs/md/full_details/crawl_request_parameters.md deleted file mode 100644 index 896097d..0000000 --- a/docs/md/full_details/crawl_request_parameters.md +++ /dev/null @@ -1,179 +0,0 @@ -# Crawl Request Parameters for AsyncWebCrawler - -The `arun` method in Crawl4AI's `AsyncWebCrawler` is designed to be highly configurable, allowing you to customize the crawling and extraction process to suit your needs. Below are the parameters you can use with the `arun` method, along with their descriptions, possible values, and examples. - -## Parameters - -### url (str) -**Description:** The URL of the webpage to crawl. -**Required:** Yes -**Example:** -```python -url = "https://www.nbcnews.com/business" -``` - -### word_count_threshold (int) -**Description:** The minimum number of words a block must contain to be considered meaningful. The default value is defined by `MIN_WORD_THRESHOLD`. -**Required:** No -**Default Value:** `MIN_WORD_THRESHOLD` -**Example:** -```python -word_count_threshold = 10 -``` - -### extraction_strategy (ExtractionStrategy) -**Description:** The strategy to use for extracting content from the HTML. It must be an instance of `ExtractionStrategy`. If not provided, the default is `NoExtractionStrategy`. -**Required:** No -**Default Value:** `NoExtractionStrategy()` -**Example:** -```python -extraction_strategy = CosineStrategy(semantic_filter="finance") -``` - -### chunking_strategy (ChunkingStrategy) -**Description:** The strategy to use for chunking the text before processing. It must be an instance of `ChunkingStrategy`. The default value is `RegexChunking()`. -**Required:** No -**Default Value:** `RegexChunking()` -**Example:** -```python -chunking_strategy = NlpSentenceChunking() -``` - -### bypass_cache (bool) -**Description:** Whether to force a fresh crawl even if the URL has been previously crawled. The default value is `False`. -**Required:** No -**Default Value:** `False` -**Example:** -```python -bypass_cache = True -``` - -### css_selector (str) -**Description:** The CSS selector to target specific parts of the HTML for extraction. If not provided, the entire HTML will be processed. -**Required:** No -**Default Value:** `None` -**Example:** -```python -css_selector = "div.article-content" -``` - -### screenshot (bool) -**Description:** Whether to take screenshots of the page. The default value is `False`. -**Required:** No -**Default Value:** `False` -**Example:** -```python -screenshot = True -``` - -### user_agent (str) -**Description:** The user agent to use for the HTTP requests. If not provided, a default user agent will be used. -**Required:** No -**Default Value:** `None` -**Example:** -```python -user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" -``` - -### verbose (bool) -**Description:** Whether to enable verbose logging. The default value is `True`. -**Required:** No -**Default Value:** `True` -**Example:** -```python -verbose = True -``` - -### **kwargs -Additional keyword arguments that can be passed to customize the crawling process further. Some notable options include: - -- **only_text (bool):** Whether to extract only text content, excluding HTML tags. Default is `False`. -- **session_id (str):** A unique identifier for the crawling session. This is useful for maintaining state across multiple requests. -- **js_code (str or list):** JavaScript code to be executed on the page before extraction. -- **wait_for (str):** A CSS selector or JavaScript function to wait for before considering the page load complete. - -**Example:** -```python -result = await crawler.arun( - url="https://www.nbcnews.com/business", - css_selector="p", - only_text=True, - session_id="unique_session_123", - js_code="window.scrollTo(0, document.body.scrollHeight);", - wait_for="article.main-article" -) -``` - -## Example Usage - -Here's an example of how to use the `arun` method with various parameters: - -```python -import asyncio -from crawl4ai import AsyncWebCrawler -from crawl4ai.extraction_strategy import CosineStrategy -from crawl4ai.chunking_strategy import NlpSentenceChunking - -async def main(): - # Create the AsyncWebCrawler instance - async with AsyncWebCrawler(verbose=True) as crawler: - # Run the crawler with custom parameters - result = await crawler.arun( - url="https://www.nbcnews.com/business", - word_count_threshold=10, - extraction_strategy=CosineStrategy(semantic_filter="finance"), - chunking_strategy=NlpSentenceChunking(), - bypass_cache=True, - css_selector="div.article-content", - screenshot=True, - user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3", - verbose=True, - only_text=True, - session_id="business_news_session", - js_code="window.scrollTo(0, document.body.scrollHeight);", - wait_for="footer" - ) - - print(result) - -# Run the async function -asyncio.run(main()) -``` - -This example demonstrates how to configure various parameters to customize the crawling and extraction process using the asynchronous version of Crawl4AI. - -## Additional Asynchronous Methods - -The `AsyncWebCrawler` class also provides other useful asynchronous methods: - -### arun_many -**Description:** Crawl multiple URLs concurrently. -**Example:** -```python -urls = ["https://example1.com", "https://example2.com", "https://example3.com"] -results = await crawler.arun_many(urls, word_count_threshold=10, bypass_cache=True) -``` - -### aclear_cache -**Description:** Clear the crawler's cache. -**Example:** -```python -await crawler.aclear_cache() -``` - -### aflush_cache -**Description:** Completely flush the crawler's cache. -**Example:** -```python -await crawler.aflush_cache() -``` - -### aget_cache_size -**Description:** Get the current size of the cache. -**Example:** -```python -cache_size = await crawler.aget_cache_size() -print(f"Current cache size: {cache_size}") -``` - -These asynchronous methods allow for efficient and flexible use of the AsyncWebCrawler in various scenarios. \ No newline at end of file diff --git a/docs/md/full_details/crawl_result_class.md b/docs/md/full_details/crawl_result_class.md deleted file mode 100644 index e2c545f..0000000 --- a/docs/md/full_details/crawl_result_class.md +++ /dev/null @@ -1,104 +0,0 @@ -# Crawl Result - -The `CrawlResult` class is the heart of Crawl4AI's output, encapsulating all the data extracted from a crawling session. This class contains various fields that store the results of the web crawling and extraction process. Let's break down each field and see what it holds. 🎉 - -## Class Definition - -```python -from pydantic import BaseModel -from typing import Dict, List, Optional - -class CrawlResult(BaseModel): - url: str - html: str - success: bool - cleaned_html: Optional[str] = None - media: Dict[str, List[Dict]] = {} - links: Dict[str, List[Dict]] = {} - screenshot: Optional[str] = None - markdown: Optional[str] = None - extracted_content: Optional[str] = None - metadata: Optional[dict] = None - error_message: Optional[str] = None - session_id: Optional[str] = None - responser_headers: Optional[dict] = None - status_code: Optional[int] = None -``` - -## Fields Explanation - -### `url: str` -The URL that was crawled. This field simply stores the URL of the web page that was processed. - -### `html: str` -The raw HTML content of the web page. This is the unprocessed HTML source as retrieved by the crawler. - -### `success: bool` -A flag indicating whether the crawling and extraction were successful. If any error occurs during the process, this will be `False`. - -### `cleaned_html: Optional[str]` -The cleaned HTML content of the web page. This field holds the HTML after removing unwanted tags like ` diff --git a/docs/md/introduction.md b/docs/md/introduction.md deleted file mode 100644 index 6d1ad56..0000000 --- a/docs/md/introduction.md +++ /dev/null @@ -1,29 +0,0 @@ -# Introduction - -Welcome to the documentation for Crawl4AI v0.2.5! 🕷️🤖 - -Crawl4AI is designed to simplify the process of crawling web pages and extracting useful information for large language models (LLMs) and AI applications. Whether you're using it as a REST API, a Python library, or through a Google Colab notebook, Crawl4AI provides powerful features to make web data extraction easier and more efficient. - -## Key Features ✨ - -- **🆓 Completely Free and Open-Source**: Crawl4AI is free to use and open-source, making it accessible for everyone. -- **🤖 LLM-Friendly Output Formats**: Supports JSON, cleaned HTML, and markdown formats. -- **🌍 Concurrent Crawling**: Crawl multiple URLs simultaneously to save time. -- **🎨 Media Extraction**: Extract all media tags including images, audio, and video. -- **🔗 Link Extraction**: Extract all external and internal links from web pages. -- **📚 Metadata Extraction**: Extract metadata from web pages for additional context. -- **🔄 Custom Hooks**: Define custom hooks for authentication, headers, and page modifications before crawling. -- **🕵️ User Agent Support**: Customize the user agent for HTTP requests. -- **🖼️ Screenshot Capability**: Take screenshots of web pages during crawling. -- **📜 JavaScript Execution**: Execute custom JavaScripts before crawling. -- **📚 Advanced Chunking and Extraction Strategies**: Utilize topic-based, regex, sentence chunking, cosine clustering, and LLM extraction strategies. -- **🎯 CSS Selector Support**: Extract specific content using CSS selectors. -- **📝 Instruction/Keyword Refinement**: Pass instructions or keywords to refine the extraction process. - -Check the [Changelog](https://github.com/unclecode/crawl4ai/blob/main/CHANGELOG.md) for more details. - -## Power and Simplicity of Crawl4AI 🚀 - -Crawl4AI provides an easy way to crawl and extract data from web pages without installing any library. You can use the REST API on our server or run the local server on your machine. For more advanced control, use the Python library to customize your crawling and extraction strategies. - -Explore the documentation to learn more about the features, installation process, usage examples, and how to contribute to Crawl4AI. Let's make the web more accessible and useful for AI applications! 💪🌐🤖 diff --git a/docs/md/quickstart.md b/docs/md/quickstart.md deleted file mode 100644 index 39ab636..0000000 --- a/docs/md/quickstart.md +++ /dev/null @@ -1,285 +0,0 @@ -# Quick Start Guide 🚀 - -Welcome to the Crawl4AI Quickstart Guide! In this tutorial, we'll walk you through the basic usage of Crawl4AI with a friendly and humorous tone. We'll cover everything from basic usage to advanced features like chunking and extraction strategies, all with the power of asynchronous programming. Let's dive in! 🌟 - -## Getting Started 🛠️ - -First, let's import the necessary modules and create an instance of `AsyncWebCrawler`. We'll use an async context manager, which handles the setup and teardown of the crawler for us. - -```python -import asyncio -from crawl4ai import AsyncWebCrawler - -async def main(): - async with AsyncWebCrawler(verbose=True) as crawler: - # We'll add our crawling code here - pass - -if __name__ == "__main__": - asyncio.run(main()) -``` - -### Basic Usage - -Simply provide a URL and let Crawl4AI do the magic! - -```python -async def main(): - async with AsyncWebCrawler(verbose=True) as crawler: - result = await crawler.arun(url="https://www.nbcnews.com/business") - print(f"Basic crawl result: {result.markdown[:500]}") # Print first 500 characters - -asyncio.run(main()) -``` - -### Taking Screenshots 📸 - -Let's take a screenshot of the page! - -```python -import base64 - -async def main(): - async with AsyncWebCrawler(verbose=True) as crawler: - result = await crawler.arun(url="https://www.nbcnews.com/business", screenshot=True) - with open("screenshot.png", "wb") as f: - f.write(base64.b64decode(result.screenshot)) - print("Screenshot saved to 'screenshot.png'!") - -asyncio.run(main()) -``` - -### Understanding Parameters 🧠 - -By default, Crawl4AI caches the results of your crawls. This means that subsequent crawls of the same URL will be much faster! Let's see this in action. - -```python -async def main(): - async with AsyncWebCrawler(verbose=True) as crawler: - # First crawl (caches the result) - result1 = await crawler.arun(url="https://www.nbcnews.com/business") - print(f"First crawl result: {result1.markdown[:100]}...") - - # Force to crawl again - result2 = await crawler.arun(url="https://www.nbcnews.com/business", bypass_cache=True) - print(f"Second crawl result: {result2.markdown[:100]}...") - -asyncio.run(main()) -``` - -### Adding a Chunking Strategy 🧩 - -Let's add a chunking strategy: `RegexChunking`! This strategy splits the text based on a given regex pattern. - -```python -from crawl4ai.chunking_strategy import RegexChunking - -async def main(): - async with AsyncWebCrawler(verbose=True) as crawler: - result = await crawler.arun( - url="https://www.nbcnews.com/business", - chunking_strategy=RegexChunking(patterns=["\n\n"]) - ) - print(f"RegexChunking result: {result.extracted_content[:200]}...") - -asyncio.run(main()) -``` - -### Adding an Extraction Strategy 🧠 - -Let's get smarter with an extraction strategy: `JsonCssExtractionStrategy`! This strategy extracts structured data from HTML using CSS selectors. - -```python -from crawl4ai.extraction_strategy import JsonCssExtractionStrategy -import json - -async def main(): - schema = { - "name": "News Articles", - "baseSelector": "article.tease-card", - "fields": [ - { - "name": "title", - "selector": "h2", - "type": "text", - }, - { - "name": "summary", - "selector": "div.tease-card__info", - "type": "text", - } - ], - } - - async with AsyncWebCrawler(verbose=True) as crawler: - result = await crawler.arun( - url="https://www.nbcnews.com/business", - extraction_strategy=JsonCssExtractionStrategy(schema, verbose=True) - ) - extracted_data = json.loads(result.extracted_content) - print(f"Extracted {len(extracted_data)} articles") - print(json.dumps(extracted_data[0], indent=2)) - -asyncio.run(main()) -``` - -### Using LLMExtractionStrategy 🤖 - -Time to bring in the big guns: `LLMExtractionStrategy`! This strategy uses a large language model to extract relevant information from the web page. - -```python -from crawl4ai.extraction_strategy import LLMExtractionStrategy -import os -from pydantic import BaseModel, Field - -class OpenAIModelFee(BaseModel): - model_name: str = Field(..., description="Name of the OpenAI model.") - input_fee: str = Field(..., description="Fee for input token for the OpenAI model.") - output_fee: str = Field(..., description="Fee for output token for the OpenAI model.") - -async def main(): - if not os.getenv("OPENAI_API_KEY"): - print("OpenAI API key not found. Skipping this example.") - return - - async with AsyncWebCrawler(verbose=True) as crawler: - result = await crawler.arun( - url="https://openai.com/api/pricing/", - word_count_threshold=1, - extraction_strategy=LLMExtractionStrategy( - provider="openai/gpt-4o", - api_token=os.getenv("OPENAI_API_KEY"), - schema=OpenAIModelFee.schema(), - extraction_type="schema", - instruction="""From the crawled content, extract all mentioned model names along with their fees for input and output tokens. - Do not miss any models in the entire content. One extracted model JSON format should look like this: - {"model_name": "GPT-4", "input_fee": "US$10.00 / 1M tokens", "output_fee": "US$30.00 / 1M tokens"}.""", - ), - bypass_cache=True, - ) - print(result.extracted_content) - -asyncio.run(main()) -``` - -### Interactive Extraction 🖱️ - -Let's use JavaScript to interact with the page before extraction! - -```python -async def main(): - js_code = """ - const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); - loadMoreButton && loadMoreButton.click(); - """ - - wait_for = """() => { - return Array.from(document.querySelectorAll('article.tease-card')).length > 10; - }""" - - async with AsyncWebCrawler(verbose=True) as crawler: - result = await crawler.arun( - url="https://www.nbcnews.com/business", - js_code=js_code, - wait_for=wait_for, - css_selector="article.tease-card", - bypass_cache=True, - ) - print(f"JavaScript interaction result: {result.extracted_content[:500]}") - -asyncio.run(main()) -``` - -### Advanced Session-Based Crawling with Dynamic Content 🔄 - -In modern web applications, content is often loaded dynamically without changing the URL. This is common in single-page applications (SPAs) or websites using infinite scrolling. Traditional crawling methods that rely on URL changes won't work here. That's where Crawl4AI's advanced session-based crawling comes in handy! - -Here's what makes this approach powerful: - -1. **Session Preservation**: By using a `session_id`, we can maintain the state of our crawling session across multiple interactions with the page. This is crucial for navigating through dynamically loaded content. - -2. **Asynchronous JavaScript Execution**: We can execute custom JavaScript to trigger content loading or navigation. In this example, we'll click a "Load More" button to fetch the next page of commits. - -3. **Dynamic Content Waiting**: The `wait_for` parameter allows us to specify a condition that must be met before considering the page load complete. This ensures we don't extract data before the new content is fully loaded. - -Let's see how this works with a real-world example: crawling multiple pages of commits on a GitHub repository. The URL doesn't change as we load more commits, so we'll use these advanced techniques to navigate and extract data. - -```python -import json -from bs4 import BeautifulSoup -from crawl4ai import AsyncWebCrawler -from crawl4ai.extraction_strategy import JsonCssExtractionStrategy - -async def main(): - async with AsyncWebCrawler(verbose=True) as crawler: - url = "https://github.com/microsoft/TypeScript/commits/main" - session_id = "typescript_commits_session" - all_commits = [] - - js_next_page = """ - const button = document.querySelector('a[data-testid="pagination-next-button"]'); - if (button) button.click(); - """ - - wait_for = """() => { - const commits = document.querySelectorAll('li.Box-sc-g0xbh4-0 h4'); - if (commits.length === 0) return false; - const firstCommit = commits[0].textContent.trim(); - return firstCommit !== window.lastCommit; - }""" - - schema = { - "name": "Commit Extractor", - "baseSelector": "li.Box-sc-g0xbh4-0", - "fields": [ - { - "name": "title", - "selector": "h4.markdown-title", - "type": "text", - "transform": "strip", - }, - ], - } - extraction_strategy = JsonCssExtractionStrategy(schema, verbose=True) - - for page in range(3): # Crawl 3 pages - result = await crawler.arun( - url=url, - session_id=session_id, - css_selector="li.Box-sc-g0xbh4-0", - extraction_strategy=extraction_strategy, - js_code=js_next_page if page > 0 else None, - wait_for=wait_for if page > 0 else None, - js_only=page > 0, - bypass_cache=True, - headless=False, - ) - - assert result.success, f"Failed to crawl page {page + 1}" - - commits = json.loads(result.extracted_content) - all_commits.extend(commits) - - print(f"Page {page + 1}: Found {len(commits)} commits") - - await crawler.crawler_strategy.kill_session(session_id) - print(f"Successfully crawled {len(all_commits)} commits across 3 pages") - -asyncio.run(main()) -``` - -In this example, we're crawling multiple pages of commits from a GitHub repository. The URL doesn't change as we load more commits, so we use JavaScript to click the "Load More" button and a `wait_for` condition to ensure the new content is loaded before extraction. This powerful combination allows us to navigate and extract data from complex, dynamically-loaded web applications with ease! - -## Congratulations! 🎉 - -You've made it through the Crawl4AI Quickstart Guide! Now go forth and crawl the web asynchronously like a pro! 🕸️ - -Remember, these are just a few examples of what Crawl4AI can do. For more advanced usage, check out our other documentation pages: - -- [LLM Extraction](examples/llm_extraction.md) -- [JS Execution & CSS Filtering](examples/js_execution_css_filtering.md) -- [Hooks & Auth](examples/hooks_auth.md) -- [Summarization](examples/summarization.md) -- [Research Assistant](examples/research_assistant.md) - -Happy crawling! 🚀 \ No newline at end of file diff --git a/docs/md_v2/advanced/content-processing.md b/docs/md_v2/advanced/content-processing.md new file mode 100644 index 0000000..71a3243 --- /dev/null +++ b/docs/md_v2/advanced/content-processing.md @@ -0,0 +1,223 @@ +# Content Processing + +Crawl4AI provides powerful content processing capabilities that help you extract clean, relevant content from web pages. This guide covers content cleaning, media handling, link analysis, and metadata extraction. + +## Content Cleaning + +### Understanding Clean Content +When crawling web pages, you often encounter a lot of noise - advertisements, navigation menus, footers, popups, and other irrelevant content. Crawl4AI automatically cleans this noise using several approaches: + +1. **Basic Cleaning**: Removes unwanted HTML elements and attributes +2. **Content Relevance**: Identifies and preserves meaningful content blocks +3. **Layout Analysis**: Understands page structure to identify main content areas + +```python +result = await crawler.arun( + url="https://example.com", + word_count_threshold=10, # Remove blocks with fewer words + excluded_tags=['form', 'nav'], # Remove specific HTML tags + remove_overlay_elements=True # Remove popups/modals +) + +# Get clean content +print(result.cleaned_html) # Cleaned HTML +print(result.markdown) # Clean markdown version +``` + +### Fit Markdown: Smart Content Extraction +One of Crawl4AI's most powerful features is `fit_markdown`. This feature uses advanced heuristics to identify and extract the main content from a webpage while excluding irrelevant elements. + +#### How Fit Markdown Works +- Analyzes content density and distribution +- Identifies content patterns and structures +- Removes boilerplate content (headers, footers, sidebars) +- Preserves the most relevant content blocks +- Maintains content hierarchy and formatting + +#### Perfect For: +- Blog posts and articles +- News content +- Documentation pages +- Any page with a clear main content area + +#### Not Recommended For: +- E-commerce product listings +- Search results pages +- Social media feeds +- Pages with multiple equal-weight content sections + +```python +result = await crawler.arun(url="https://example.com") + +# Get the most relevant content +main_content = result.fit_markdown + +# Compare with regular markdown +all_content = result.markdown + +print(f"Fit Markdown Length: {len(main_content)}") +print(f"Regular Markdown Length: {len(all_content)}") +``` + +#### Example Use Case +```python +async def extract_article_content(url: str) -> str: + """Extract main article content from a blog or news site.""" + async with AsyncWebCrawler() as crawler: + result = await crawler.arun(url=url) + + # fit_markdown will focus on the article content, + # excluding navigation, ads, and other distractions + return result.fit_markdown +``` + +## Media Processing + +Crawl4AI provides comprehensive media extraction and analysis capabilities. It automatically detects and processes various types of media elements while maintaining their context and relevance. + +### Image Processing +The library handles various image scenarios, including: +- Regular images +- Lazy-loaded images +- Background images +- Responsive images +- Image metadata and context + +```python +result = await crawler.arun(url="https://example.com") + +for image in result.media["images"]: + # Each image includes rich metadata + print(f"Source: {image['src']}") + print(f"Alt text: {image['alt']}") + print(f"Description: {image['desc']}") + print(f"Context: {image['context']}") # Surrounding text + print(f"Relevance score: {image['score']}") # 0-10 score +``` + +### Handling Lazy-Loaded Content +Crawl4aai already handles lazy loading for media elements. You can also customize the wait time for lazy-loaded content: + +```python +result = await crawler.arun( + url="https://example.com", + wait_for="css:img[data-src]", # Wait for lazy images + delay_before_return_html=2.0 # Additional wait time +) +``` + +### Video and Audio Content +The library extracts video and audio elements with their metadata: + +```python +# Process videos +for video in result.media["videos"]: + print(f"Video source: {video['src']}") + print(f"Type: {video['type']}") + print(f"Duration: {video.get('duration')}") + print(f"Thumbnail: {video.get('poster')}") + +# Process audio +for audio in result.media["audios"]: + print(f"Audio source: {audio['src']}") + print(f"Type: {audio['type']}") + print(f"Duration: {audio.get('duration')}") +``` + +## Link Analysis + +Crawl4AI provides sophisticated link analysis capabilities, helping you understand the relationship between pages and identify important navigation patterns. + +### Link Classification +The library automatically categorizes links into: +- Internal links (same domain) +- External links (different domains) +- Social media links +- Navigation links +- Content links + +```python +result = await crawler.arun(url="https://example.com") + +# Analyze internal links +for link in result.links["internal"]: + print(f"Internal: {link['href']}") + print(f"Link text: {link['text']}") + print(f"Context: {link['context']}") # Surrounding text + print(f"Type: {link['type']}") # nav, content, etc. + +# Analyze external links +for link in result.links["external"]: + print(f"External: {link['href']}") + print(f"Domain: {link['domain']}") + print(f"Type: {link['type']}") +``` + +### Smart Link Filtering +Control which links are included in the results: + +```python +result = await crawler.arun( + url="https://example.com", + exclude_external_links=True, # Remove external links + exclude_social_media_links=True, # Remove social media links + exclude_social_media_domains=[ # Custom social media domains + "facebook.com", "twitter.com", "instagram.com" + ], + exclude_domains=["ads.example.com"] # Exclude specific domains +) +``` + +## Metadata Extraction + +Crawl4AI automatically extracts and processes page metadata, providing valuable information about the content: + +```python +result = await crawler.arun(url="https://example.com") + +metadata = result.metadata +print(f"Title: {metadata['title']}") +print(f"Description: {metadata['description']}") +print(f"Keywords: {metadata['keywords']}") +print(f"Author: {metadata['author']}") +print(f"Published Date: {metadata['published_date']}") +print(f"Modified Date: {metadata['modified_date']}") +print(f"Language: {metadata['language']}") +``` + +## Best Practices + +1. **Use Fit Markdown for Articles** + ```python + # Perfect for blog posts, news articles, documentation + content = result.fit_markdown + ``` + +2. **Handle Media Appropriately** + ```python + # Filter by relevance score + relevant_images = [ + img for img in result.media["images"] + if img['score'] > 5 + ] + ``` + +3. **Combine Link Analysis with Content** + ```python + # Get content links with context + content_links = [ + link for link in result.links["internal"] + if link['type'] == 'content' + ] + ``` + +4. **Clean Content with Purpose** + ```python + # Customize cleaning based on your needs + result = await crawler.arun( + url=url, + word_count_threshold=20, # Adjust based on content type + keep_data_attributes=False, # Remove data attributes + process_iframes=True # Include iframe content + ) + ``` \ No newline at end of file diff --git a/docs/md/examples/hooks_auth.md b/docs/md_v2/advanced/hooks-auth.md similarity index 100% rename from docs/md/examples/hooks_auth.md rename to docs/md_v2/advanced/hooks-auth.md diff --git a/docs/md_v2/advanced/magic-mode.md b/docs/md_v2/advanced/magic-mode.md new file mode 100644 index 0000000..16c7229 --- /dev/null +++ b/docs/md_v2/advanced/magic-mode.md @@ -0,0 +1,52 @@ +# Magic Mode & Anti-Bot Protection + +Crawl4AI provides powerful anti-detection capabilities, with Magic Mode being the simplest and most comprehensive solution. + +## Magic Mode + +The easiest way to bypass anti-bot protections: + +```python +async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://example.com", + magic=True # Enables all anti-detection features + ) +``` + +Magic Mode automatically: +- Masks browser automation signals +- Simulates human-like behavior +- Overrides navigator properties +- Handles cookie consent popups +- Manages browser fingerprinting +- Randomizes timing patterns + +## Manual Anti-Bot Options + +While Magic Mode is recommended, you can also configure individual anti-detection features: + +```python +result = await crawler.arun( + url="https://example.com", + simulate_user=True, # Simulate human behavior + override_navigator=True # Mask automation signals +) +``` + +Note: When `magic=True` is used, you don't need to set these individual options. + +## Example: Handling Protected Sites + +```python +async def crawl_protected_site(url: str): + async with AsyncWebCrawler(headless=True) as crawler: + result = await crawler.arun( + url=url, + magic=True, + remove_overlay_elements=True, # Remove popups/modals + page_timeout=60000 # Increased timeout for protection checks + ) + + return result.markdown if result.success else None +``` diff --git a/docs/md_v2/advanced/managed_browser.md b/docs/md_v2/advanced/managed_browser.md new file mode 100644 index 0000000..80d6fc1 --- /dev/null +++ b/docs/md_v2/advanced/managed_browser.md @@ -0,0 +1,84 @@ +# Content Filtering in Crawl4AI + +This guide explains how to use content filtering strategies in Crawl4AI to extract the most relevant information from crawled web pages. You'll learn how to use the built-in `BM25ContentFilter` and how to create your own custom content filtering strategies. + +## Relevance Content Filter + +The `RelevanceContentFilter` is an abstract class that provides a common interface for content filtering strategies. Specific filtering algorithms, like `BM25ContentFilter`, inherit from this class and implement the `filter_content` method. This method takes the HTML content as input and returns a list of filtered text blocks. + +## BM25 Algorithm + +The `BM25ContentFilter` uses the BM25 algorithm, a ranking function used in information retrieval to estimate the relevance of documents to a given search query. In Crawl4AI, this algorithm helps to identify and extract text chunks that are most relevant to the page's metadata or a user-specified query. + +### Usage + +To use the `BM25ContentFilter`, initialize it and then pass it as the `extraction_strategy` parameter to the `arun` method of the crawler. + +```python +from crawl4ai import AsyncWebCrawler +from crawl4ai.content_filter_strategy import BM25ContentFilter + +async def filter_content(url, query=None): + async with AsyncWebCrawler() as crawler: + content_filter = BM25ContentFilter(user_query=query) + result = await crawler.arun(url=url, extraction_strategy=content_filter, fit_markdown=True) # Set fit_markdown flag to True to trigger BM25 filtering + if result.success: + print(f"Filtered Content (JSON):\n{result.extracted_content}") + print(f"\nFiltered Markdown:\n{result.fit_markdown}") # New field in CrawlResult object + print(f"\nFiltered HTML:\n{result.fit_html}") # New field in CrawlResult object. Note that raw HTML may have tags re-organized due to internal parsing. + else: + print("Error:", result.error_message) + +# Example usage: +asyncio.run(filter_content("https://en.wikipedia.org/wiki/Apple", "fruit nutrition health")) # with query +asyncio.run(filter_content("https://en.wikipedia.org/wiki/Apple")) # without query, metadata will be used as the query. + +``` + +### Parameters + +- **`user_query`**: (Optional) A string representing the search query. If not provided, the filter extracts relevant metadata (title, description, keywords) from the page and uses that as the query. +- **`bm25_threshold`**: (Optional, default 1.0) A float value that controls the threshold for relevance. Higher values result in stricter filtering, returning only the most relevant text chunks. Lower values result in more lenient filtering. + + +## Fit Markdown Flag + +Setting the `fit_markdown` flag to `True` in the `arun` method activates the BM25 content filtering during the crawl. The `fit_markdown` parameter instructs the scraper to extract and clean the HTML, primarily to prepare for a Large Language Model that cannot process large amounts of data. Setting this flag not only improves the quality of the extracted content but also adds the filtered content to two new attributes in the returned `CrawlResult` object: `fit_markdown` and `fit_html`. + + +## Custom Content Filtering Strategies + +You can create your own custom filtering strategies by inheriting from the `RelevantContentFilter` class and implementing the `filter_content` method. This allows you to tailor the filtering logic to your specific needs. + +```python +from crawl4ai.content_filter_strategy import RelevantContentFilter +from bs4 import BeautifulSoup, Tag +from typing import List + +class MyCustomFilter(RelevantContentFilter): + def filter_content(self, html: str) -> List[str]: + soup = BeautifulSoup(html, 'lxml') + # Implement custom filtering logic here + # Example: extract all paragraphs within divs with class "article-body" + filtered_paragraphs = [] + for tag in soup.select("div.article-body p"): + if isinstance(tag, Tag): + filtered_paragraphs.append(str(tag)) # Add the cleaned HTML element. + return filtered_paragraphs + + + +async def custom_filter_demo(url: str): + async with AsyncWebCrawler() as crawler: + custom_filter = MyCustomFilter() + result = await crawler.arun(url, extraction_strategy=custom_filter) + if result.success: + print(result.extracted_content) + +``` + +This example demonstrates extracting paragraphs from a specific div class. You can customize this logic to implement different filtering strategies, use regular expressions, analyze text density, or apply other relevant techniques. + +## Conclusion + +Content filtering strategies provide a powerful way to refine the output of your crawls. By using `BM25ContentFilter` or creating custom strategies, you can focus on the most pertinent information and improve the efficiency of your data processing pipeline. diff --git a/docs/md_v2/advanced/proxy-security.md b/docs/md_v2/advanced/proxy-security.md new file mode 100644 index 0000000..c760253 --- /dev/null +++ b/docs/md_v2/advanced/proxy-security.md @@ -0,0 +1,84 @@ +# Proxy & Security + +Configure proxy settings and enhance security features in Crawl4AI for reliable data extraction. + +## Basic Proxy Setup + +Simple proxy configuration: + +```python +# Using proxy URL +async with AsyncWebCrawler( + proxy="http://proxy.example.com:8080" +) as crawler: + result = await crawler.arun(url="https://example.com") + +# Using SOCKS proxy +async with AsyncWebCrawler( + proxy="socks5://proxy.example.com:1080" +) as crawler: + result = await crawler.arun(url="https://example.com") +``` + +## Authenticated Proxy + +Use proxy with authentication: + +```python +proxy_config = { + "server": "http://proxy.example.com:8080", + "username": "user", + "password": "pass" +} + +async with AsyncWebCrawler(proxy_config=proxy_config) as crawler: + result = await crawler.arun(url="https://example.com") +``` + +## Rotating Proxies + +Example using a proxy rotation service: + +```python +async def get_next_proxy(): + # Your proxy rotation logic here + return {"server": "http://next.proxy.com:8080"} + +async with AsyncWebCrawler() as crawler: + # Update proxy for each request + for url in urls: + proxy = await get_next_proxy() + crawler.update_proxy(proxy) + result = await crawler.arun(url=url) +``` + +## Custom Headers + +Add security-related headers: + +```python +headers = { + "X-Forwarded-For": "203.0.113.195", + "Accept-Language": "en-US,en;q=0.9", + "Cache-Control": "no-cache", + "Pragma": "no-cache" +} + +async with AsyncWebCrawler(headers=headers) as crawler: + result = await crawler.arun(url="https://example.com") +``` + +## Combining with Magic Mode + +For maximum protection, combine proxy with Magic Mode: + +```python +async with AsyncWebCrawler( + proxy="http://proxy.example.com:8080", + headers={"Accept-Language": "en-US"} +) as crawler: + result = await crawler.arun( + url="https://example.com", + magic=True # Enable all anti-detection features + ) +``` \ No newline at end of file diff --git a/docs/md/full_details/session_based_crawling.md b/docs/md_v2/advanced/session-management-advanced.md similarity index 98% rename from docs/md/full_details/session_based_crawling.md rename to docs/md_v2/advanced/session-management-advanced.md index f8c81da..908828f 100644 --- a/docs/md/full_details/session_based_crawling.md +++ b/docs/md_v2/advanced/session-management-advanced.md @@ -30,7 +30,7 @@ Let's start with a basic example of session-based crawling: ```python import asyncio -from crawl4ai import AsyncWebCrawler +from crawl4ai import AsyncWebCrawler, CacheMode async def basic_session_crawl(): async with AsyncWebCrawler(verbose=True) as crawler: @@ -43,7 +43,7 @@ async def basic_session_crawl(): session_id=session_id, js_code="document.querySelector('.load-more-button').click();" if page > 0 else None, css_selector=".content-item", - bypass_cache=True + cache_mode=CacheMode.BYPASS ) print(f"Page {page + 1}: Found {result.extracted_content.count('.content-item')} items") @@ -102,7 +102,7 @@ async def advanced_session_crawl_with_hooks(): session_id=session_id, css_selector="li.commit-item", js_code=js_next_page if page > 0 else None, - bypass_cache=True, + cache_mode=CacheMode.BYPASS, js_only=page > 0 ) @@ -174,7 +174,7 @@ async def integrated_js_and_wait_crawl(): extraction_strategy=extraction_strategy, js_code=js_next_page_and_wait if page > 0 else None, js_only=page > 0, - bypass_cache=True + cache_mode=CacheMode.BYPASS ) commits = json.loads(result.extracted_content) @@ -241,7 +241,7 @@ async def wait_for_parameter_crawl(): js_code=js_next_page if page > 0 else None, wait_for=wait_for if page > 0 else None, js_only=page > 0, - bypass_cache=True + cache_mode=CacheMode.BYPASS ) commits = json.loads(result.extracted_content) diff --git a/docs/md_v2/advanced/session-management.md b/docs/md_v2/advanced/session-management.md new file mode 100644 index 0000000..eae4cf7 --- /dev/null +++ b/docs/md_v2/advanced/session-management.md @@ -0,0 +1,133 @@ +# Session Management + +Session management in Crawl4AI allows you to maintain state across multiple requests and handle complex multi-page crawling tasks, particularly useful for dynamic websites. + +## Basic Session Usage + +Use `session_id` to maintain state between requests: + +```python +async with AsyncWebCrawler() as crawler: + session_id = "my_session" + + # First request + result1 = await crawler.arun( + url="https://example.com/page1", + session_id=session_id + ) + + # Subsequent request using same session + result2 = await crawler.arun( + url="https://example.com/page2", + session_id=session_id + ) + + # Clean up when done + await crawler.crawler_strategy.kill_session(session_id) +``` + +## Dynamic Content with Sessions + +Here's a real-world example of crawling GitHub commits across multiple pages: + +```python +async def crawl_dynamic_content(): + async with AsyncWebCrawler(verbose=True) as crawler: + url = "https://github.com/microsoft/TypeScript/commits/main" + session_id = "typescript_commits_session" + all_commits = [] + + # Define navigation JavaScript + js_next_page = """ + const button = document.querySelector('a[data-testid="pagination-next-button"]'); + if (button) button.click(); + """ + + # Define wait condition + wait_for = """() => { + const commits = document.querySelectorAll('li.Box-sc-g0xbh4-0 h4'); + if (commits.length === 0) return false; + const firstCommit = commits[0].textContent.trim(); + return firstCommit !== window.firstCommit; + }""" + + # Define extraction schema + schema = { + "name": "Commit Extractor", + "baseSelector": "li.Box-sc-g0xbh4-0", + "fields": [ + { + "name": "title", + "selector": "h4.markdown-title", + "type": "text", + "transform": "strip", + }, + ], + } + extraction_strategy = JsonCssExtractionStrategy(schema) + + # Crawl multiple pages + for page in range(3): + result = await crawler.arun( + url=url, + session_id=session_id, + extraction_strategy=extraction_strategy, + js_code=js_next_page if page > 0 else None, + wait_for=wait_for if page > 0 else None, + js_only=page > 0, + cache_mode=CacheMode.BYPASS + ) + + if result.success: + commits = json.loads(result.extracted_content) + all_commits.extend(commits) + print(f"Page {page + 1}: Found {len(commits)} commits") + + # Clean up session + await crawler.crawler_strategy.kill_session(session_id) + return all_commits +``` + +## Session Best Practices + +1. **Session Naming**: +```python +# Use descriptive session IDs +session_id = "login_flow_session" +session_id = "product_catalog_session" +``` + +2. **Resource Management**: +```python +try: + # Your crawling code + pass +finally: + # Always clean up sessions + await crawler.crawler_strategy.kill_session(session_id) +``` + +3. **State Management**: +```python +# First page: login +result = await crawler.arun( + url="https://example.com/login", + session_id=session_id, + js_code="document.querySelector('form').submit();" +) + +# Second page: verify login success +result = await crawler.arun( + url="https://example.com/dashboard", + session_id=session_id, + wait_for="css:.user-profile" # Wait for authenticated content +) +``` + +## Common Use Cases + +1. **Authentication Flows** +2. **Pagination Handling** +3. **Form Submissions** +4. **Multi-step Processes** +5. **Dynamic Content Navigation** diff --git a/docs/md_v2/api/arun.md b/docs/md_v2/api/arun.md new file mode 100644 index 0000000..509991e --- /dev/null +++ b/docs/md_v2/api/arun.md @@ -0,0 +1,244 @@ +# Complete Parameter Guide for arun() + +The following parameters can be passed to the `arun()` method. They are organized by their primary usage context and functionality. + +## Core Parameters + +```python +await crawler.arun( + url="https://example.com", # Required: URL to crawl + verbose=True, # Enable detailed logging + cache_mode=CacheMode.ENABLED, # Control cache behavior + warmup=True # Whether to run warmup check +) +``` + +## Cache Control + +```python +from crawl4ai import CacheMode + +await crawler.arun( + cache_mode=CacheMode.ENABLED, # Normal caching (read/write) + # Other cache modes: + # cache_mode=CacheMode.DISABLED # No caching at all + # cache_mode=CacheMode.READ_ONLY # Only read from cache + # cache_mode=CacheMode.WRITE_ONLY # Only write to cache + # cache_mode=CacheMode.BYPASS # Skip cache for this operation +) +``` + +## Content Processing Parameters + +### Text Processing +```python +await crawler.arun( + word_count_threshold=10, # Minimum words per content block + image_description_min_word_threshold=5, # Minimum words for image descriptions + only_text=False, # Extract only text content + excluded_tags=['form', 'nav'], # HTML tags to exclude + keep_data_attributes=False, # Preserve data-* attributes +) +``` + +### Content Selection +```python +await crawler.arun( + css_selector=".main-content", # CSS selector for content extraction + remove_forms=True, # Remove all form elements + remove_overlay_elements=True, # Remove popups/modals/overlays +) +``` + +### Link Handling +```python +await crawler.arun( + exclude_external_links=True, # Remove external links + exclude_social_media_links=True, # Remove social media links + exclude_external_images=True, # Remove external images + exclude_domains=["ads.example.com"], # Specific domains to exclude + social_media_domains=[ # Additional social media domains + "facebook.com", + "twitter.com", + "instagram.com" + ] +) +``` + +## Browser Control Parameters + +### Basic Browser Settings +```python +await crawler.arun( + headless=True, # Run browser in headless mode + browser_type="chromium", # Browser engine: "chromium", "firefox", "webkit" + page_timeout=60000, # Page load timeout in milliseconds + user_agent="custom-agent", # Custom user agent +) +``` + +### Navigation and Waiting +```python +await crawler.arun( + wait_for="css:.dynamic-content", # Wait for element/condition + delay_before_return_html=2.0, # Wait before returning HTML (seconds) +) +``` + +### JavaScript Execution +```python +await crawler.arun( + js_code=[ # JavaScript to execute (string or list) + "window.scrollTo(0, document.body.scrollHeight);", + "document.querySelector('.load-more').click();" + ], + js_only=False, # Only execute JavaScript without reloading page +) +``` + +### Anti-Bot Features +```python +await crawler.arun( + magic=True, # Enable all anti-detection features + simulate_user=True, # Simulate human behavior + override_navigator=True # Override navigator properties +) +``` + +### Session Management +```python +await crawler.arun( + session_id="my_session", # Session identifier for persistent browsing +) +``` + +### Screenshot Options +```python +await crawler.arun( + screenshot=True, # Take page screenshot + screenshot_wait_for=2.0, # Wait before screenshot (seconds) +) +``` + +### Proxy Configuration +```python +await crawler.arun( + proxy="http://proxy.example.com:8080", # Simple proxy URL + proxy_config={ # Advanced proxy settings + "server": "http://proxy.example.com:8080", + "username": "user", + "password": "pass" + } +) +``` + +## Content Extraction Parameters + +### Extraction Strategy +```python +await crawler.arun( + extraction_strategy=LLMExtractionStrategy( + provider="ollama/llama2", + schema=MySchema.schema(), + instruction="Extract specific data" + ) +) +``` + +### Chunking Strategy +```python +await crawler.arun( + chunking_strategy=RegexChunking( + patterns=[r'\n\n', r'\.\s+'] + ) +) +``` + +### HTML to Text Options +```python +await crawler.arun( + html2text={ + "ignore_links": False, + "ignore_images": False, + "escape_dot": False, + "body_width": 0, + "protect_links": True, + "unicode_snob": True + } +) +``` + +## Debug Options +```python +await crawler.arun( + log_console=True, # Log browser console messages +) +``` + +## Parameter Interactions and Notes + +1. **Cache and Performance Setup** + ```python + # Optimal caching for repeated crawls + await crawler.arun( + cache_mode=CacheMode.ENABLED, + word_count_threshold=10, + process_iframes=False + ) + ``` + +2. **Dynamic Content Handling** + ```python + # Handle lazy-loaded content + await crawler.arun( + js_code="window.scrollTo(0, document.body.scrollHeight);", + wait_for="css:.lazy-content", + delay_before_return_html=2.0, + cache_mode=CacheMode.WRITE_ONLY # Cache results after dynamic load + ) + ``` + +3. **Content Extraction Pipeline** + ```python + # Complete extraction setup + await crawler.arun( + css_selector=".main-content", + word_count_threshold=20, + extraction_strategy=my_strategy, + chunking_strategy=my_chunking, + process_iframes=True, + remove_overlay_elements=True, + cache_mode=CacheMode.ENABLED + ) + ``` + +## Best Practices + +1. **Performance Optimization** + ```python + await crawler.arun( + cache_mode=CacheMode.ENABLED, # Use full caching + word_count_threshold=10, # Filter out noise + process_iframes=False # Skip iframes if not needed + ) + ``` + +2. **Reliable Scraping** + ```python + await crawler.arun( + magic=True, # Enable anti-detection + delay_before_return_html=1.0, # Wait for dynamic content + page_timeout=60000, # Longer timeout for slow pages + cache_mode=CacheMode.WRITE_ONLY # Cache results after successful crawl + ) + ``` + +3. **Clean Content** + ```python + await crawler.arun( + remove_overlay_elements=True, # Remove popups + excluded_tags=['nav', 'aside'],# Remove unnecessary elements + keep_data_attributes=False, # Remove data attributes + cache_mode=CacheMode.ENABLED # Use cache for faster processing + ) + ``` \ No newline at end of file diff --git a/docs/md_v2/api/async-webcrawler.md b/docs/md_v2/api/async-webcrawler.md new file mode 100644 index 0000000..be95610 --- /dev/null +++ b/docs/md_v2/api/async-webcrawler.md @@ -0,0 +1,320 @@ +# AsyncWebCrawler + +The `AsyncWebCrawler` class is the main interface for web crawling operations. It provides asynchronous web crawling capabilities with extensive configuration options. + +## Constructor + +```python +AsyncWebCrawler( + # Browser Settings + browser_type: str = "chromium", # Options: "chromium", "firefox", "webkit" + headless: bool = True, # Run browser in headless mode + verbose: bool = False, # Enable verbose logging + + # Cache Settings + always_by_pass_cache: bool = False, # Always bypass cache + base_directory: str = str(os.getenv("CRAWL4_AI_BASE_DIRECTORY", Path.home())), # Base directory for cache + + # Network Settings + proxy: str = None, # Simple proxy URL + proxy_config: Dict = None, # Advanced proxy configuration + + # Browser Behavior + sleep_on_close: bool = False, # Wait before closing browser + + # Custom Settings + user_agent: str = None, # Custom user agent + headers: Dict[str, str] = {}, # Custom HTTP headers + js_code: Union[str, List[str]] = None, # Default JavaScript to execute +) +``` + +### Parameters in Detail + +#### Browser Settings + +- **browser_type** (str, optional) + - Default: `"chromium"` + - Options: `"chromium"`, `"firefox"`, `"webkit"` + - Controls which browser engine to use + ```python + # Example: Using Firefox + crawler = AsyncWebCrawler(browser_type="firefox") + ``` + +- **headless** (bool, optional) + - Default: `True` + - When `True`, browser runs without GUI + - Set to `False` for debugging + ```python + # Visible browser for debugging + crawler = AsyncWebCrawler(headless=False) + ``` + +- **verbose** (bool, optional) + - Default: `False` + - Enables detailed logging + ```python + # Enable detailed logging + crawler = AsyncWebCrawler(verbose=True) + ``` + +#### Cache Settings + +- **always_by_pass_cache** (bool, optional) + - Default: `False` + - When `True`, always fetches fresh content + ```python + # Always fetch fresh content + crawler = AsyncWebCrawler(always_by_pass_cache=True) + ``` + +- **base_directory** (str, optional) + - Default: User's home directory + - Base path for cache storage + ```python + # Custom cache directory + crawler = AsyncWebCrawler(base_directory="/path/to/cache") + ``` + +#### Network Settings + +- **proxy** (str, optional) + - Simple proxy URL + ```python + # Using simple proxy + crawler = AsyncWebCrawler(proxy="http://proxy.example.com:8080") + ``` + +- **proxy_config** (Dict, optional) + - Advanced proxy configuration with authentication + ```python + # Advanced proxy with auth + crawler = AsyncWebCrawler(proxy_config={ + "server": "http://proxy.example.com:8080", + "username": "user", + "password": "pass" + }) + ``` + +#### Browser Behavior + +- **sleep_on_close** (bool, optional) + - Default: `False` + - Adds delay before closing browser + ```python + # Wait before closing + crawler = AsyncWebCrawler(sleep_on_close=True) + ``` + +#### Custom Settings + +- **user_agent** (str, optional) + - Custom user agent string + ```python + # Custom user agent + crawler = AsyncWebCrawler( + user_agent="Mozilla/5.0 (Custom Agent) Chrome/90.0" + ) + ``` + +- **headers** (Dict[str, str], optional) + - Custom HTTP headers + ```python + # Custom headers + crawler = AsyncWebCrawler( + headers={ + "Accept-Language": "en-US", + "Custom-Header": "Value" + } + ) + ``` + +- **js_code** (Union[str, List[str]], optional) + - Default JavaScript to execute on each page + ```python + # Default JavaScript + crawler = AsyncWebCrawler( + js_code=[ + "window.scrollTo(0, document.body.scrollHeight);", + "document.querySelector('.load-more').click();" + ] + ) + ``` + +## Methods + +### arun() + +The primary method for crawling web pages. + +```python +async def arun( + # Required + url: str, # URL to crawl + + # Content Selection + css_selector: str = None, # CSS selector for content + word_count_threshold: int = 10, # Minimum words per block + + # Cache Control + bypass_cache: bool = False, # Bypass cache for this request + + # Session Management + session_id: str = None, # Session identifier + + # Screenshot Options + screenshot: bool = False, # Take screenshot + screenshot_wait_for: float = None, # Wait before screenshot + + # Content Processing + process_iframes: bool = False, # Process iframe content + remove_overlay_elements: bool = False, # Remove popups/modals + + # Anti-Bot Settings + simulate_user: bool = False, # Simulate human behavior + override_navigator: bool = False, # Override navigator properties + magic: bool = False, # Enable all anti-detection + + # Content Filtering + excluded_tags: List[str] = None, # HTML tags to exclude + exclude_external_links: bool = False, # Remove external links + exclude_social_media_links: bool = False, # Remove social media links + + # JavaScript Handling + js_code: Union[str, List[str]] = None, # JavaScript to execute + wait_for: str = None, # Wait condition + + # Page Loading + page_timeout: int = 60000, # Page load timeout (ms) + delay_before_return_html: float = None, # Wait before return + + # Extraction + extraction_strategy: ExtractionStrategy = None # Extraction strategy +) -> CrawlResult: +``` + +### Usage Examples + +#### Basic Crawling +```python +async with AsyncWebCrawler() as crawler: + result = await crawler.arun(url="https://example.com") +``` + +#### Advanced Crawling +```python +async with AsyncWebCrawler( + browser_type="firefox", + verbose=True, + headers={"Custom-Header": "Value"} +) as crawler: + result = await crawler.arun( + url="https://example.com", + css_selector=".main-content", + word_count_threshold=20, + process_iframes=True, + magic=True, + wait_for="css:.dynamic-content", + screenshot=True + ) +``` + +#### Session Management +```python +async with AsyncWebCrawler() as crawler: + # First request + result1 = await crawler.arun( + url="https://example.com/login", + session_id="my_session" + ) + + # Subsequent request using same session + result2 = await crawler.arun( + url="https://example.com/protected", + session_id="my_session" + ) +``` + +## Context Manager + +AsyncWebCrawler implements the async context manager protocol: + +```python +async def __aenter__(self) -> 'AsyncWebCrawler': + # Initialize browser and resources + return self + +async def __aexit__(self, *args): + # Cleanup resources + pass +``` + +Always use AsyncWebCrawler with async context manager: +```python +async with AsyncWebCrawler() as crawler: + # Your crawling code here + pass +``` + +## Best Practices + +1. **Resource Management** +```python +# Always use context manager +async with AsyncWebCrawler() as crawler: + # Crawler will be properly cleaned up + pass +``` + +2. **Error Handling** +```python +try: + async with AsyncWebCrawler() as crawler: + result = await crawler.arun(url="https://example.com") + if not result.success: + print(f"Crawl failed: {result.error_message}") +except Exception as e: + print(f"Error: {str(e)}") +``` + +3. **Performance Optimization** +```python +# Enable caching for better performance +crawler = AsyncWebCrawler( + always_by_pass_cache=False, + verbose=True +) +``` + +4. **Anti-Detection** +```python +# Maximum stealth +crawler = AsyncWebCrawler( + headless=True, + user_agent="Mozilla/5.0...", + headers={"Accept-Language": "en-US"} +) +result = await crawler.arun( + url="https://example.com", + magic=True, + simulate_user=True +) +``` + +## Note on Browser Types + +Each browser type has its characteristics: + +- **chromium**: Best overall compatibility +- **firefox**: Good for specific use cases +- **webkit**: Lighter weight, good for basic crawling + +Choose based on your specific needs: +```python +# High compatibility +crawler = AsyncWebCrawler(browser_type="chromium") + +# Memory efficient +crawler = AsyncWebCrawler(browser_type="webkit") +``` \ No newline at end of file diff --git a/docs/md_v2/api/crawl-result.md b/docs/md_v2/api/crawl-result.md new file mode 100644 index 0000000..7e3bda9 --- /dev/null +++ b/docs/md_v2/api/crawl-result.md @@ -0,0 +1,302 @@ +# CrawlResult + +The `CrawlResult` class represents the result of a web crawling operation. It provides access to various forms of extracted content and metadata from the crawled webpage. + +## Class Definition + +```python +class CrawlResult(BaseModel): + """Result of a web crawling operation.""" + + # Basic Information + url: str # Crawled URL + success: bool # Whether crawl succeeded + status_code: Optional[int] = None # HTTP status code + error_message: Optional[str] = None # Error message if failed + + # Content + html: str # Raw HTML content + cleaned_html: Optional[str] = None # Cleaned HTML + fit_html: Optional[str] = None # Most relevant HTML content + markdown: Optional[str] = None # HTML converted to markdown + fit_markdown: Optional[str] = None # Most relevant markdown content + downloaded_files: Optional[List[str]] = None # Downloaded files + + # Extracted Data + extracted_content: Optional[str] = None # Content from extraction strategy + media: Dict[str, List[Dict]] = {} # Extracted media information + links: Dict[str, List[Dict]] = {} # Extracted links + metadata: Optional[dict] = None # Page metadata + + # Additional Data + screenshot: Optional[str] = None # Base64 encoded screenshot + session_id: Optional[str] = None # Session identifier + response_headers: Optional[dict] = None # HTTP response headers +``` + +## Properties and Their Data Structures + +### Basic Information + +```python +# Access basic information +result = await crawler.arun(url="https://example.com") + +print(result.url) # "https://example.com" +print(result.success) # True/False +print(result.status_code) # 200, 404, etc. +print(result.error_message) # Error details if failed +``` + +### Content Properties + +#### HTML Content +```python +# Raw HTML +html_content = result.html + +# Cleaned HTML (removed ads, popups, etc.) +clean_content = result.cleaned_html + +# Most relevant HTML content +main_content = result.fit_html +``` + +#### Markdown Content +```python +# Full markdown version +markdown_content = result.markdown + +# Most relevant markdown content +main_content = result.fit_markdown +``` + +### Media Content + +The media dictionary contains organized media elements: + +```python +# Structure +media = { + "images": [ + { + "src": str, # Image URL + "alt": str, # Alt text + "desc": str, # Contextual description + "score": float, # Relevance score (0-10) + "type": str, # "image" + "width": int, # Image width (if available) + "height": int, # Image height (if available) + "context": str, # Surrounding text + "lazy": bool # Whether image was lazy-loaded + } + ], + "videos": [ + { + "src": str, # Video URL + "type": str, # "video" + "title": str, # Video title + "poster": str, # Thumbnail URL + "duration": str, # Video duration + "description": str # Video description + } + ], + "audios": [ + { + "src": str, # Audio URL + "type": str, # "audio" + "title": str, # Audio title + "duration": str, # Audio duration + "description": str # Audio description + } + ] +} + +# Example usage +for image in result.media["images"]: + if image["score"] > 5: # High-relevance images + print(f"High-quality image: {image['src']}") + print(f"Context: {image['context']}") +``` + +### Link Analysis + +The links dictionary organizes discovered links: + +```python +# Structure +links = { + "internal": [ + { + "href": str, # URL + "text": str, # Link text + "title": str, # Title attribute + "type": str, # Link type (nav, content, etc.) + "context": str, # Surrounding text + "score": float # Relevance score + } + ], + "external": [ + { + "href": str, # External URL + "text": str, # Link text + "title": str, # Title attribute + "domain": str, # Domain name + "type": str, # Link type + "context": str # Surrounding text + } + ] +} + +# Example usage +for link in result.links["internal"]: + print(f"Internal link: {link['href']}") + print(f"Context: {link['context']}") +``` + +### Metadata + +The metadata dictionary contains page information: + +```python +# Structure +metadata = { + "title": str, # Page title + "description": str, # Meta description + "keywords": List[str], # Meta keywords + "author": str, # Author information + "published_date": str, # Publication date + "modified_date": str, # Last modified date + "language": str, # Page language + "canonical_url": str, # Canonical URL + "og_data": Dict, # Open Graph data + "twitter_data": Dict # Twitter card data +} + +# Example usage +if result.metadata: + print(f"Title: {result.metadata['title']}") + print(f"Author: {result.metadata.get('author', 'Unknown')}") +``` + +### Extracted Content + +Content from extraction strategies: + +```python +# For LLM or CSS extraction strategies +if result.extracted_content: + structured_data = json.loads(result.extracted_content) + print(structured_data) +``` + +### Screenshot + +Base64 encoded screenshot: + +```python +# Save screenshot if available +if result.screenshot: + import base64 + + # Decode and save + with open("screenshot.png", "wb") as f: + f.write(base64.b64decode(result.screenshot)) +``` + +## Usage Examples + +### Basic Content Access +```python +async with AsyncWebCrawler() as crawler: + result = await crawler.arun(url="https://example.com") + + if result.success: + # Get clean content + print(result.fit_markdown) + + # Process images + for image in result.media["images"]: + if image["score"] > 7: + print(f"High-quality image: {image['src']}") +``` + +### Complete Data Processing +```python +async def process_webpage(url: str) -> Dict: + async with AsyncWebCrawler() as crawler: + result = await crawler.arun(url=url) + + if not result.success: + raise Exception(f"Crawl failed: {result.error_message}") + + return { + "content": result.fit_markdown, + "images": [ + img for img in result.media["images"] + if img["score"] > 5 + ], + "internal_links": [ + link["href"] for link in result.links["internal"] + ], + "metadata": result.metadata, + "status": result.status_code + } +``` + +### Error Handling +```python +async def safe_crawl(url: str) -> Dict: + async with AsyncWebCrawler() as crawler: + try: + result = await crawler.arun(url=url) + + if not result.success: + return { + "success": False, + "error": result.error_message, + "status": result.status_code + } + + return { + "success": True, + "content": result.fit_markdown, + "status": result.status_code + } + + except Exception as e: + return { + "success": False, + "error": str(e), + "status": None + } +``` + +## Best Practices + +1. **Always Check Success** +```python +if not result.success: + print(f"Error: {result.error_message}") + return +``` + +2. **Use fit_markdown for Articles** +```python +# Better for article content +content = result.fit_markdown if result.fit_markdown else result.markdown +``` + +3. **Filter Media by Score** +```python +relevant_images = [ + img for img in result.media["images"] + if img["score"] > 5 +] +``` + +4. **Handle Missing Data** +```python +metadata = result.metadata or {} +title = metadata.get('title', 'Unknown Title') +``` \ No newline at end of file diff --git a/docs/md_v2/api/parameters.md b/docs/md_v2/api/parameters.md new file mode 100644 index 0000000..c1c4d2e --- /dev/null +++ b/docs/md_v2/api/parameters.md @@ -0,0 +1,36 @@ +# Parameter Reference Table + +| File Name | Parameter Name | Code Usage | Strategy/Class | Description | +|-----------|---------------|------------|----------------|-------------| +| async_crawler_strategy.py | user_agent | `kwargs.get("user_agent")` | AsyncPlaywrightCrawlerStrategy | User agent string for browser identification | +| async_crawler_strategy.py | proxy | `kwargs.get("proxy")` | AsyncPlaywrightCrawlerStrategy | Proxy server configuration for network requests | +| async_crawler_strategy.py | proxy_config | `kwargs.get("proxy_config")` | AsyncPlaywrightCrawlerStrategy | Detailed proxy configuration including auth | +| async_crawler_strategy.py | headless | `kwargs.get("headless", True)` | AsyncPlaywrightCrawlerStrategy | Whether to run browser in headless mode | +| async_crawler_strategy.py | browser_type | `kwargs.get("browser_type", "chromium")` | AsyncPlaywrightCrawlerStrategy | Type of browser to use (chromium/firefox/webkit) | +| async_crawler_strategy.py | headers | `kwargs.get("headers", {})` | AsyncPlaywrightCrawlerStrategy | Custom HTTP headers for requests | +| async_crawler_strategy.py | verbose | `kwargs.get("verbose", False)` | AsyncPlaywrightCrawlerStrategy | Enable detailed logging output | +| async_crawler_strategy.py | sleep_on_close | `kwargs.get("sleep_on_close", False)` | AsyncPlaywrightCrawlerStrategy | Add delay before closing browser | +| async_crawler_strategy.py | use_managed_browser | `kwargs.get("use_managed_browser", False)` | AsyncPlaywrightCrawlerStrategy | Use managed browser instance | +| async_crawler_strategy.py | user_data_dir | `kwargs.get("user_data_dir", None)` | AsyncPlaywrightCrawlerStrategy | Custom directory for browser profile data | +| async_crawler_strategy.py | session_id | `kwargs.get("session_id")` | AsyncPlaywrightCrawlerStrategy | Unique identifier for browser session | +| async_crawler_strategy.py | override_navigator | `kwargs.get("override_navigator", False)` | AsyncPlaywrightCrawlerStrategy | Override browser navigator properties | +| async_crawler_strategy.py | simulate_user | `kwargs.get("simulate_user", False)` | AsyncPlaywrightCrawlerStrategy | Simulate human-like behavior | +| async_crawler_strategy.py | magic | `kwargs.get("magic", False)` | AsyncPlaywrightCrawlerStrategy | Enable advanced anti-detection features | +| async_crawler_strategy.py | log_console | `kwargs.get("log_console", False)` | AsyncPlaywrightCrawlerStrategy | Log browser console messages | +| async_crawler_strategy.py | js_only | `kwargs.get("js_only", False)` | AsyncPlaywrightCrawlerStrategy | Only execute JavaScript without page load | +| async_crawler_strategy.py | page_timeout | `kwargs.get("page_timeout", 60000)` | AsyncPlaywrightCrawlerStrategy | Timeout for page load in milliseconds | +| async_crawler_strategy.py | ignore_body_visibility | `kwargs.get("ignore_body_visibility", True)` | AsyncPlaywrightCrawlerStrategy | Process page even if body is hidden | +| async_crawler_strategy.py | js_code | `kwargs.get("js_code", kwargs.get("js", self.js_code))` | AsyncPlaywrightCrawlerStrategy | Custom JavaScript code to execute | +| async_crawler_strategy.py | wait_for | `kwargs.get("wait_for")` | AsyncPlaywrightCrawlerStrategy | Wait for specific element/condition | +| async_crawler_strategy.py | process_iframes | `kwargs.get("process_iframes", False)` | AsyncPlaywrightCrawlerStrategy | Extract content from iframes | +| async_crawler_strategy.py | delay_before_return_html | `kwargs.get("delay_before_return_html")` | AsyncPlaywrightCrawlerStrategy | Additional delay before returning HTML | +| async_crawler_strategy.py | remove_overlay_elements | `kwargs.get("remove_overlay_elements", False)` | AsyncPlaywrightCrawlerStrategy | Remove pop-ups and overlay elements | +| async_crawler_strategy.py | screenshot | `kwargs.get("screenshot")` | AsyncPlaywrightCrawlerStrategy | Take page screenshot | +| async_crawler_strategy.py | screenshot_wait_for | `kwargs.get("screenshot_wait_for")` | AsyncPlaywrightCrawlerStrategy | Wait before taking screenshot | +| async_crawler_strategy.py | semaphore_count | `kwargs.get("semaphore_count", 5)` | AsyncPlaywrightCrawlerStrategy | Concurrent request limit | +| async_webcrawler.py | verbose | `kwargs.get("verbose", False)` | AsyncWebCrawler | Enable detailed logging | +| async_webcrawler.py | warmup | `kwargs.get("warmup", True)` | AsyncWebCrawler | Initialize crawler with warmup request | +| async_webcrawler.py | session_id | `kwargs.get("session_id", None)` | AsyncWebCrawler | Session identifier for browser reuse | +| async_webcrawler.py | only_text | `kwargs.get("only_text", False)` | AsyncWebCrawler | Extract only text content | +| async_webcrawler.py | bypass_cache | `kwargs.get("bypass_cache", False)` | AsyncWebCrawler | Skip cache and force fresh crawl | +| async_webcrawler.py | cache_mode | `kwargs.get("cache_mode", CacheMode.ENABLE)` | AsyncWebCrawler | Cache handling mode for request | \ No newline at end of file diff --git a/docs/md_v2/api/strategies.md b/docs/md_v2/api/strategies.md new file mode 100644 index 0000000..f0f8f57 --- /dev/null +++ b/docs/md_v2/api/strategies.md @@ -0,0 +1,255 @@ +# Extraction & Chunking Strategies API + +This documentation covers the API reference for extraction and chunking strategies in Crawl4AI. + +## Extraction Strategies + +All extraction strategies inherit from the base `ExtractionStrategy` class and implement two key methods: +- `extract(url: str, html: str) -> List[Dict[str, Any]]` +- `run(url: str, sections: List[str]) -> List[Dict[str, Any]]` + +### LLMExtractionStrategy + +Used for extracting structured data using Language Models. + +```python +LLMExtractionStrategy( + # Required Parameters + provider: str = DEFAULT_PROVIDER, # LLM provider (e.g., "ollama/llama2") + api_token: Optional[str] = None, # API token + + # Extraction Configuration + instruction: str = None, # Custom extraction instruction + schema: Dict = None, # Pydantic model schema for structured data + extraction_type: str = "block", # "block" or "schema" + + # Chunking Parameters + chunk_token_threshold: int = 4000, # Maximum tokens per chunk + overlap_rate: float = 0.1, # Overlap between chunks + word_token_rate: float = 0.75, # Word to token conversion rate + apply_chunking: bool = True, # Enable/disable chunking + + # API Configuration + base_url: str = None, # Base URL for API + extra_args: Dict = {}, # Additional provider arguments + verbose: bool = False # Enable verbose logging +) +``` + +### CosineStrategy + +Used for content similarity-based extraction and clustering. + +```python +CosineStrategy( + # Content Filtering + semantic_filter: str = None, # Topic/keyword filter + word_count_threshold: int = 10, # Minimum words per cluster + sim_threshold: float = 0.3, # Similarity threshold + + # Clustering Parameters + max_dist: float = 0.2, # Maximum cluster distance + linkage_method: str = 'ward', # Clustering method + top_k: int = 3, # Top clusters to return + + # Model Configuration + model_name: str = 'sentence-transformers/all-MiniLM-L6-v2', # Embedding model + + verbose: bool = False # Enable verbose logging +) +``` + +### JsonCssExtractionStrategy + +Used for CSS selector-based structured data extraction. + +```python +JsonCssExtractionStrategy( + schema: Dict[str, Any], # Extraction schema + verbose: bool = False # Enable verbose logging +) + +# Schema Structure +schema = { + "name": str, # Schema name + "baseSelector": str, # Base CSS selector + "fields": [ # List of fields to extract + { + "name": str, # Field name + "selector": str, # CSS selector + "type": str, # Field type: "text", "attribute", "html", "regex" + "attribute": str, # For type="attribute" + "pattern": str, # For type="regex" + "transform": str, # Optional: "lowercase", "uppercase", "strip" + "default": Any # Default value if extraction fails + } + ] +} +``` + +## Chunking Strategies + +All chunking strategies inherit from `ChunkingStrategy` and implement the `chunk(text: str) -> list` method. + +### RegexChunking + +Splits text based on regex patterns. + +```python +RegexChunking( + patterns: List[str] = None # Regex patterns for splitting + # Default: [r'\n\n'] +) +``` + +### SlidingWindowChunking + +Creates overlapping chunks with a sliding window approach. + +```python +SlidingWindowChunking( + window_size: int = 100, # Window size in words + step: int = 50 # Step size between windows +) +``` + +### OverlappingWindowChunking + +Creates chunks with specified overlap. + +```python +OverlappingWindowChunking( + window_size: int = 1000, # Chunk size in words + overlap: int = 100 # Overlap size in words +) +``` + +## Usage Examples + +### LLM Extraction + +```python +from pydantic import BaseModel +from crawl4ai.extraction_strategy import LLMExtractionStrategy + +# Define schema +class Article(BaseModel): + title: str + content: str + author: str + +# Create strategy +strategy = LLMExtractionStrategy( + provider="ollama/llama2", + schema=Article.schema(), + instruction="Extract article details" +) + +# Use with crawler +result = await crawler.arun( + url="https://example.com/article", + extraction_strategy=strategy +) + +# Access extracted data +data = json.loads(result.extracted_content) +``` + +### CSS Extraction + +```python +from crawl4ai.extraction_strategy import JsonCssExtractionStrategy + +# Define schema +schema = { + "name": "Product List", + "baseSelector": ".product-card", + "fields": [ + { + "name": "title", + "selector": "h2.title", + "type": "text" + }, + { + "name": "price", + "selector": ".price", + "type": "text", + "transform": "strip" + }, + { + "name": "image", + "selector": "img", + "type": "attribute", + "attribute": "src" + } + ] +} + +# Create and use strategy +strategy = JsonCssExtractionStrategy(schema) +result = await crawler.arun( + url="https://example.com/products", + extraction_strategy=strategy +) +``` + +### Content Chunking + +```python +from crawl4ai.chunking_strategy import OverlappingWindowChunking + +# Create chunking strategy +chunker = OverlappingWindowChunking( + window_size=500, # 500 words per chunk + overlap=50 # 50 words overlap +) + +# Use with extraction strategy +strategy = LLMExtractionStrategy( + provider="ollama/llama2", + chunking_strategy=chunker +) + +result = await crawler.arun( + url="https://example.com/long-article", + extraction_strategy=strategy +) +``` + +## Best Practices + +1. **Choose the Right Strategy** + - Use `LLMExtractionStrategy` for complex, unstructured content + - Use `JsonCssExtractionStrategy` for well-structured HTML + - Use `CosineStrategy` for content similarity and clustering + +2. **Optimize Chunking** + ```python + # For long documents + strategy = LLMExtractionStrategy( + chunk_token_threshold=2000, # Smaller chunks + overlap_rate=0.1 # 10% overlap + ) + ``` + +3. **Handle Errors** + ```python + try: + result = await crawler.arun( + url="https://example.com", + extraction_strategy=strategy + ) + if result.success: + content = json.loads(result.extracted_content) + except Exception as e: + print(f"Extraction failed: {e}") + ``` + +4. **Monitor Performance** + ```python + strategy = CosineStrategy( + verbose=True, # Enable logging + word_count_threshold=20, # Filter short content + top_k=5 # Limit results + ) + ``` \ No newline at end of file diff --git a/docs/md _sync/assets/DankMono-Bold.woff2 b/docs/md_v2/assets/DankMono-Bold.woff2 similarity index 100% rename from docs/md _sync/assets/DankMono-Bold.woff2 rename to docs/md_v2/assets/DankMono-Bold.woff2 diff --git a/docs/md _sync/assets/DankMono-Italic.woff2 b/docs/md_v2/assets/DankMono-Italic.woff2 similarity index 100% rename from docs/md _sync/assets/DankMono-Italic.woff2 rename to docs/md_v2/assets/DankMono-Italic.woff2 diff --git a/docs/md _sync/assets/DankMono-Regular.woff2 b/docs/md_v2/assets/DankMono-Regular.woff2 similarity index 100% rename from docs/md _sync/assets/DankMono-Regular.woff2 rename to docs/md_v2/assets/DankMono-Regular.woff2 diff --git a/docs/md _sync/assets/Monaco.woff b/docs/md_v2/assets/Monaco.woff similarity index 100% rename from docs/md _sync/assets/Monaco.woff rename to docs/md_v2/assets/Monaco.woff diff --git a/docs/md _sync/assets/dmvendor.css b/docs/md_v2/assets/dmvendor.css similarity index 100% rename from docs/md _sync/assets/dmvendor.css rename to docs/md_v2/assets/dmvendor.css diff --git a/docs/md_v2/assets/docs.zip b/docs/md_v2/assets/docs.zip new file mode 100644 index 0000000..6b28c0a Binary files /dev/null and b/docs/md_v2/assets/docs.zip differ diff --git a/docs/md _sync/assets/highlight.css b/docs/md_v2/assets/highlight.css similarity index 100% rename from docs/md _sync/assets/highlight.css rename to docs/md_v2/assets/highlight.css diff --git a/docs/md _sync/assets/highlight.min.js b/docs/md_v2/assets/highlight.min.js similarity index 100% rename from docs/md _sync/assets/highlight.min.js rename to docs/md_v2/assets/highlight.min.js diff --git a/docs/md _sync/assets/highlight_init.js b/docs/md_v2/assets/highlight_init.js similarity index 100% rename from docs/md _sync/assets/highlight_init.js rename to docs/md_v2/assets/highlight_init.js diff --git a/docs/md _sync/assets/styles.css b/docs/md_v2/assets/styles.css similarity index 96% rename from docs/md _sync/assets/styles.css rename to docs/md_v2/assets/styles.css index f103474..68a93f5 100644 --- a/docs/md _sync/assets/styles.css +++ b/docs/md_v2/assets/styles.css @@ -150,4 +150,11 @@ strong, .tab-content pre { margin: 0; max-height: 300px; overflow: auto; border:none; +} + +ol li::before { + content: counters(item, ".") ". "; + counter-increment: item; + /* float: left; */ + /* padding-right: 5px; */ } \ No newline at end of file diff --git a/docs/md_v2/basic/browser-config.md b/docs/md_v2/basic/browser-config.md new file mode 100644 index 0000000..7df4a97 --- /dev/null +++ b/docs/md_v2/basic/browser-config.md @@ -0,0 +1,208 @@ +# Browser Configuration + +Crawl4AI supports multiple browser engines and offers extensive configuration options for browser behavior. + +## Browser Types + +Choose from three browser engines: + +```python +# Chromium (default) +async with AsyncWebCrawler(browser_type="chromium") as crawler: + result = await crawler.arun(url="https://example.com") + +# Firefox +async with AsyncWebCrawler(browser_type="firefox") as crawler: + result = await crawler.arun(url="https://example.com") + +# WebKit +async with AsyncWebCrawler(browser_type="webkit") as crawler: + result = await crawler.arun(url="https://example.com") +``` + +## Basic Configuration + +Common browser settings: + +```python +async with AsyncWebCrawler( + headless=True, # Run in headless mode (no GUI) + verbose=True, # Enable detailed logging + sleep_on_close=False # No delay when closing browser +) as crawler: + result = await crawler.arun(url="https://example.com") +``` + +## Identity Management + +Control how your crawler appears to websites: + +```python +# Custom user agent +async with AsyncWebCrawler( + user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36" +) as crawler: + result = await crawler.arun(url="https://example.com") + +# Custom headers +headers = { + "Accept-Language": "en-US,en;q=0.9", + "Cache-Control": "no-cache" +} +async with AsyncWebCrawler(headers=headers) as crawler: + result = await crawler.arun(url="https://example.com") +``` + +## Screenshot Capabilities + +Capture page screenshots with enhanced error handling: + +```python +result = await crawler.arun( + url="https://example.com", + screenshot=True, # Enable screenshot + screenshot_wait_for=2.0 # Wait 2 seconds before capture +) + +if result.screenshot: # Base64 encoded image + import base64 + with open("screenshot.png", "wb") as f: + f.write(base64.b64decode(result.screenshot)) +``` + +## Timeouts and Waiting + +Control page loading behavior: + +```python +result = await crawler.arun( + url="https://example.com", + page_timeout=60000, # Page load timeout (ms) + delay_before_return_html=2.0, # Wait before content capture + wait_for="css:.dynamic-content" # Wait for specific element +) +``` + +## JavaScript Execution + +Execute custom JavaScript before crawling: + +```python +# Single JavaScript command +result = await crawler.arun( + url="https://example.com", + js_code="window.scrollTo(0, document.body.scrollHeight);" +) + +# Multiple commands +js_commands = [ + "window.scrollTo(0, document.body.scrollHeight);", + "document.querySelector('.load-more').click();" +] +result = await crawler.arun( + url="https://example.com", + js_code=js_commands +) +``` + +## Proxy Configuration + +Use proxies for enhanced access: + +```python +# Simple proxy +async with AsyncWebCrawler( + proxy="http://proxy.example.com:8080" +) as crawler: + result = await crawler.arun(url="https://example.com") + +# Proxy with authentication +proxy_config = { + "server": "http://proxy.example.com:8080", + "username": "user", + "password": "pass" +} +async with AsyncWebCrawler(proxy_config=proxy_config) as crawler: + result = await crawler.arun(url="https://example.com") +``` + +## Anti-Detection Features + +Enable stealth features to avoid bot detection: + +```python +result = await crawler.arun( + url="https://example.com", + simulate_user=True, # Simulate human behavior + override_navigator=True, # Mask automation signals + magic=True # Enable all anti-detection features +) +``` + +## Handling Dynamic Content + +Configure browser to handle dynamic content: + +```python +# Wait for dynamic content +result = await crawler.arun( + url="https://example.com", + wait_for="js:() => document.querySelector('.content').children.length > 10", + process_iframes=True # Process iframe content +) + +# Handle lazy-loaded images +result = await crawler.arun( + url="https://example.com", + js_code="window.scrollTo(0, document.body.scrollHeight);", + delay_before_return_html=2.0 # Wait for images to load +) +``` + +## Comprehensive Example + +Here's how to combine various browser configurations: + +```python +async def crawl_with_advanced_config(url: str): + async with AsyncWebCrawler( + # Browser setup + browser_type="chromium", + headless=True, + verbose=True, + + # Identity + user_agent="Custom User Agent", + headers={"Accept-Language": "en-US"}, + + # Proxy setup + proxy="http://proxy.example.com:8080" + ) as crawler: + result = await crawler.arun( + url=url, + # Content handling + process_iframes=True, + screenshot=True, + + # Timing + page_timeout=60000, + delay_before_return_html=2.0, + + # Anti-detection + magic=True, + simulate_user=True, + + # Dynamic content + js_code=[ + "window.scrollTo(0, document.body.scrollHeight);", + "document.querySelector('.load-more')?.click();" + ], + wait_for="css:.dynamic-content" + ) + + return { + "content": result.markdown, + "screenshot": result.screenshot, + "success": result.success + } +``` \ No newline at end of file diff --git a/docs/md_v2/basic/cache-modes.md b/docs/md_v2/basic/cache-modes.md new file mode 100644 index 0000000..04a4f21 --- /dev/null +++ b/docs/md_v2/basic/cache-modes.md @@ -0,0 +1,79 @@ +# Crawl4AI Cache System and Migration Guide + +## Overview +Starting from version X.X.X, Crawl4AI introduces a new caching system that replaces the old boolean flags with a more intuitive `CacheMode` enum. This change simplifies cache control and makes the behavior more predictable. + +## Old vs New Approach + +### Old Way (Deprecated) +The old system used multiple boolean flags: +- `bypass_cache`: Skip cache entirely +- `disable_cache`: Disable all caching +- `no_cache_read`: Don't read from cache +- `no_cache_write`: Don't write to cache + +### New Way (Recommended) +The new system uses a single `CacheMode` enum: +- `CacheMode.ENABLED`: Normal caching (read/write) +- `CacheMode.DISABLED`: No caching at all +- `CacheMode.READ_ONLY`: Only read from cache +- `CacheMode.WRITE_ONLY`: Only write to cache +- `CacheMode.BYPASS`: Skip cache for this operation + +## Migration Example + +### Old Code (Deprecated) +```python +import asyncio +from crawl4ai import AsyncWebCrawler + +async def use_proxy(): + async with AsyncWebCrawler(verbose=True) as crawler: + result = await crawler.arun( + url="https://www.nbcnews.com/business", + bypass_cache=True # Old way + ) + print(len(result.markdown)) + +async def main(): + await use_proxy() + +if __name__ == "__main__": + asyncio.run(main()) +``` + +### New Code (Recommended) +```python +import asyncio +from crawl4ai import AsyncWebCrawler, CacheMode # Import CacheMode + +async def use_proxy(): + async with AsyncWebCrawler(verbose=True) as crawler: + result = await crawler.arun( + url="https://www.nbcnews.com/business", + cache_mode=CacheMode.BYPASS # New way + ) + print(len(result.markdown)) + +async def main(): + await use_proxy() + +if __name__ == "__main__": + asyncio.run(main()) +``` + +## Common Migration Patterns + +Old Flag | New Mode +---------|---------- +`bypass_cache=True` | `cache_mode=CacheMode.BYPASS` +`disable_cache=True` | `cache_mode=CacheMode.DISABLED` +`no_cache_read=True` | `cache_mode=CacheMode.WRITE_ONLY` +`no_cache_write=True` | `cache_mode=CacheMode.READ_ONLY` + +## Suppressing Deprecation Warnings +If you need time to migrate, you can temporarily suppress deprecation warnings: +```python +# In your config.py +SHOW_DEPRECATION_WARNINGS = False +``` diff --git a/docs/md_v2/basic/content-selection.md b/docs/md_v2/basic/content-selection.md new file mode 100644 index 0000000..f5f7397 --- /dev/null +++ b/docs/md_v2/basic/content-selection.md @@ -0,0 +1,199 @@ +# Content Selection + +Crawl4AI provides multiple ways to select and filter specific content from webpages. Learn how to precisely target the content you need. + +## CSS Selectors + +The simplest way to extract specific content: + +```python +# Extract specific content using CSS selector +result = await crawler.arun( + url="https://example.com", + css_selector=".main-article" # Target main article content +) + +# Multiple selectors +result = await crawler.arun( + url="https://example.com", + css_selector="article h1, article .content" # Target heading and content +) +``` + +## Content Filtering + +Control what content is included or excluded: + +```python +result = await crawler.arun( + url="https://example.com", + # Content thresholds + word_count_threshold=10, # Minimum words per block + + # Tag exclusions + excluded_tags=['form', 'header', 'footer', 'nav'], + + # Link filtering + exclude_external_links=True, # Remove external links + exclude_social_media_links=True, # Remove social media links + + # Media filtering + exclude_external_images=True # Remove external images +) +``` + +## Iframe Content + +Process content inside iframes: + +```python +result = await crawler.arun( + url="https://example.com", + process_iframes=True, # Extract iframe content + remove_overlay_elements=True # Remove popups/modals that might block iframes +) +``` + +## Structured Content Selection + +### Using LLMs for Smart Selection + +Use LLMs to intelligently extract specific types of content: + +```python +from pydantic import BaseModel +from crawl4ai.extraction_strategy import LLMExtractionStrategy + +class ArticleContent(BaseModel): + title: str + main_points: List[str] + conclusion: str + +strategy = LLMExtractionStrategy( + provider="ollama/nemotron", # Works with any supported LLM + schema=ArticleContent.schema(), + instruction="Extract the main article title, key points, and conclusion" +) + +result = await crawler.arun( + url="https://example.com", + extraction_strategy=strategy +) +article = json.loads(result.extracted_content) +``` + +### Pattern-Based Selection + +For repeated content patterns (like product listings, news feeds): + +```python +from crawl4ai.extraction_strategy import JsonCssExtractionStrategy + +schema = { + "name": "News Articles", + "baseSelector": "article.news-item", # Repeated element + "fields": [ + {"name": "headline", "selector": "h2", "type": "text"}, + {"name": "summary", "selector": ".summary", "type": "text"}, + {"name": "category", "selector": ".category", "type": "text"}, + { + "name": "metadata", + "type": "nested", + "fields": [ + {"name": "author", "selector": ".author", "type": "text"}, + {"name": "date", "selector": ".date", "type": "text"} + ] + } + ] +} + +strategy = JsonCssExtractionStrategy(schema) +result = await crawler.arun( + url="https://example.com", + extraction_strategy=strategy +) +articles = json.loads(result.extracted_content) +``` + +## Domain-Based Filtering + +Control content based on domains: + +```python +result = await crawler.arun( + url="https://example.com", + exclude_domains=["ads.com", "tracker.com"], + exclude_social_media_domains=["facebook.com", "twitter.com"], # Custom social media domains to exclude + exclude_social_media_links=True +) +``` + +## Media Selection + +Select specific types of media: + +```python +result = await crawler.arun(url="https://example.com") + +# Access different media types +images = result.media["images"] # List of image details +videos = result.media["videos"] # List of video details +audios = result.media["audios"] # List of audio details + +# Image with metadata +for image in images: + print(f"URL: {image['src']}") + print(f"Alt text: {image['alt']}") + print(f"Description: {image['desc']}") + print(f"Relevance score: {image['score']}") +``` + +## Comprehensive Example + +Here's how to combine different selection methods: + +```python +async def extract_article_content(url: str): + # Define structured extraction + article_schema = { + "name": "Article", + "baseSelector": "article.main", + "fields": [ + {"name": "title", "selector": "h1", "type": "text"}, + {"name": "content", "selector": ".content", "type": "text"} + ] + } + + # Define LLM extraction + class ArticleAnalysis(BaseModel): + key_points: List[str] + sentiment: str + category: str + + async with AsyncWebCrawler() as crawler: + # Get structured content + pattern_result = await crawler.arun( + url=url, + extraction_strategy=JsonCssExtractionStrategy(article_schema), + word_count_threshold=10, + excluded_tags=['nav', 'footer'], + exclude_external_links=True + ) + + # Get semantic analysis + analysis_result = await crawler.arun( + url=url, + extraction_strategy=LLMExtractionStrategy( + provider="ollama/nemotron", + schema=ArticleAnalysis.schema(), + instruction="Analyze the article content" + ) + ) + + # Combine results + return { + "article": json.loads(pattern_result.extracted_content), + "analysis": json.loads(analysis_result.extracted_content), + "media": pattern_result.media + } +``` \ No newline at end of file diff --git a/docs/md_v2/basic/content_filtering.md b/docs/md_v2/basic/content_filtering.md new file mode 100644 index 0000000..9506c07 --- /dev/null +++ b/docs/md_v2/basic/content_filtering.md @@ -0,0 +1,84 @@ +# Content Filtering in Crawl4AI + +This guide explains how to use content filtering strategies in Crawl4AI to extract the most relevant information from crawled web pages. You'll learn how to use the built-in `BM25ContentFilter` and how to create your own custom content filtering strategies. + +## Relevance Content Filter + +The `RelevanceContentFilter` is an abstract class that provides a common interface for content filtering strategies. Specific filtering algorithms, like `BM25ContentFilter`, inherit from this class and implement the `filter_content` method. This method takes the HTML content as input and returns a list of filtered text blocks. + +## BM25 Algorithm + +The `BM25ContentFilter` uses the BM25 algorithm, a ranking function used in information retrieval to estimate the relevance of documents to a given search query. In Crawl4AI, this algorithm helps to identify and extract text chunks that are most relevant to the page's metadata or a user-specified query. + +### Usage + +To use the `BM25ContentFilter`, initialize it and then pass it as the `extraction_strategy` parameter to the `arun` method of the crawler. + +```python +from crawl4ai import AsyncWebCrawler +from crawl4ai.content_filter_strategy import BM25ContentFilter + +async def filter_content(url, query=None): + async with AsyncWebCrawler() as crawler: + content_filter = BM25ContentFilter(user_query=query) + result = await crawler.arun(url=url, content_filter=content_filter, fit_markdown=True) # Set fit_markdown flag to True to trigger BM25 filtering + if result.success: + print(f"Filtered Content (JSON):\n{result.extracted_content}") + print(f"\nFiltered Markdown:\n{result.fit_markdown}") # New field in CrawlResult object + print(f"\nFiltered HTML:\n{result.fit_html}") # New field in CrawlResult object. Note that raw HTML may have tags re-organized due to internal parsing. + else: + print("Error:", result.error_message) + +# Example usage: +asyncio.run(filter_content("https://en.wikipedia.org/wiki/Apple", "fruit nutrition health")) # with query +asyncio.run(filter_content("https://en.wikipedia.org/wiki/Apple")) # without query, metadata will be used as the query. + +``` + +### Parameters + +- **`user_query`**: (Optional) A string representing the search query. If not provided, the filter extracts relevant metadata (title, description, keywords) from the page and uses that as the query. +- **`bm25_threshold`**: (Optional, default 1.0) A float value that controls the threshold for relevance. Higher values result in stricter filtering, returning only the most relevant text chunks. Lower values result in more lenient filtering. + + +## Fit Markdown Flag + +Setting the `fit_markdown` flag to `True` in the `arun` method activates the BM25 content filtering during the crawl. The `fit_markdown` parameter instructs the scraper to extract and clean the HTML, primarily to prepare for a Large Language Model that cannot process large amounts of data. Setting this flag not only improves the quality of the extracted content but also adds the filtered content to two new attributes in the returned `CrawlResult` object: `fit_markdown` and `fit_html`. + + +## Custom Content Filtering Strategies + +You can create your own custom filtering strategies by inheriting from the `RelevantContentFilter` class and implementing the `filter_content` method. This allows you to tailor the filtering logic to your specific needs. + +```python +from crawl4ai.content_filter_strategy import RelevantContentFilter +from bs4 import BeautifulSoup, Tag +from typing import List + +class MyCustomFilter(RelevantContentFilter): + def filter_content(self, html: str) -> List[str]: + soup = BeautifulSoup(html, 'lxml') + # Implement custom filtering logic here + # Example: extract all paragraphs within divs with class "article-body" + filtered_paragraphs = [] + for tag in soup.select("div.article-body p"): + if isinstance(tag, Tag): + filtered_paragraphs.append(str(tag)) # Add the cleaned HTML element. + return filtered_paragraphs + + + +async def custom_filter_demo(url: str): + async with AsyncWebCrawler() as crawler: + custom_filter = MyCustomFilter() + result = await crawler.arun(url, content_filter=custom_filter) + if result.success: + print(result.extracted_content) + +``` + +This example demonstrates extracting paragraphs from a specific div class. You can customize this logic to implement different filtering strategies, use regular expressions, analyze text density, or apply other relevant techniques. + +## Conclusion + +Content filtering strategies provide a powerful way to refine the output of your crawls. By using `BM25ContentFilter` or creating custom strategies, you can focus on the most pertinent information and improve the efficiency of your data processing pipeline. diff --git a/docs/md_v2/basic/docker-deploymeny.md b/docs/md_v2/basic/docker-deploymeny.md new file mode 100644 index 0000000..87e468a --- /dev/null +++ b/docs/md_v2/basic/docker-deploymeny.md @@ -0,0 +1,718 @@ +# Docker Deployment + +Crawl4AI provides official Docker images for easy deployment and scalability. This guide covers installation, configuration, and usage of Crawl4AI in Docker environments. + +## Quick Start 🚀 + +Pull and run the basic version: + +```bash +# Basic run without security +docker pull unclecode/crawl4ai:basic +docker run -p 11235:11235 unclecode/crawl4ai:basic + +# Run with API security enabled +docker run -p 11235:11235 -e CRAWL4AI_API_TOKEN=your_secret_token unclecode/crawl4ai:basic +``` + +## Running with Docker Compose 🐳 + +### Use Docker Compose (From Local Dockerfile or Docker Hub) + +Crawl4AI provides flexibility to use Docker Compose for managing your containerized services. You can either build the image locally from the provided `Dockerfile` or use the pre-built image from Docker Hub. + +### **Option 1: Using Docker Compose to Build Locally** +If you want to build the image locally, use the provided `docker-compose.local.yml` file. + +```bash +docker-compose -f docker-compose.local.yml up -d +``` + +This will: +1. Build the Docker image from the provided `Dockerfile`. +2. Start the container and expose it on `http://localhost:11235`. + +--- + +### **Option 2: Using Docker Compose with Pre-Built Image from Hub** +If you prefer using the pre-built image on Docker Hub, use the `docker-compose.hub.yml` file. + +```bash +docker-compose -f docker-compose.hub.yml up -d +``` + +This will: +1. Pull the pre-built image `unclecode/crawl4ai:basic` (or `all`, depending on your configuration). +2. Start the container and expose it on `http://localhost:11235`. + +--- + +### **Stopping the Running Services** + +To stop the services started via Docker Compose, you can use: + +```bash +docker-compose -f docker-compose.local.yml down +# OR +docker-compose -f docker-compose.hub.yml down +``` + +If the containers don’t stop and the application is still running, check the running containers: + +```bash +docker ps +``` + +Find the `CONTAINER ID` of the running service and stop it forcefully: + +```bash +docker stop +``` + +--- + +### **Debugging with Docker Compose** + +- **Check Logs**: To view the container logs: + ```bash + docker-compose -f docker-compose.local.yml logs -f + ``` + +- **Remove Orphaned Containers**: If the service is still running unexpectedly: + ```bash + docker-compose -f docker-compose.local.yml down --remove-orphans + ``` + +- **Manually Remove Network**: If the network is still in use: + ```bash + docker network ls + docker network rm crawl4ai_default + ``` + +--- + +### Why Use Docker Compose? + +Docker Compose is the recommended way to deploy Crawl4AI because: +1. It simplifies multi-container setups. +2. Allows you to define environment variables, resources, and ports in a single file. +3. Makes it easier to switch between local development and production-ready images. + +For example, your `docker-compose.yml` could include API keys, token settings, and memory limits, making deployment quick and consistent. + + + + +## API Security 🔒 + +### Understanding CRAWL4AI_API_TOKEN + +The `CRAWL4AI_API_TOKEN` provides optional security for your Crawl4AI instance: + +- If `CRAWL4AI_API_TOKEN` is set: All API endpoints (except `/health`) require authentication +- If `CRAWL4AI_API_TOKEN` is not set: The API is publicly accessible + +```bash +# Secured Instance +docker run -p 11235:11235 -e CRAWL4AI_API_TOKEN=your_secret_token unclecode/crawl4ai:all + +# Unsecured Instance +docker run -p 11235:11235 unclecode/crawl4ai:all +``` + +### Making API Calls + +For secured instances, include the token in all requests: + +```python +import requests + +# Setup headers if token is being used +api_token = "your_secret_token" # Same token set in CRAWL4AI_API_TOKEN +headers = {"Authorization": f"Bearer {api_token}"} if api_token else {} + +# Making authenticated requests +response = requests.post( + "http://localhost:11235/crawl", + headers=headers, + json={ + "urls": "https://example.com", + "priority": 10 + } +) + +# Checking task status +task_id = response.json()["task_id"] +status = requests.get( + f"http://localhost:11235/task/{task_id}", + headers=headers +) +``` + +### Using with Docker Compose + +In your `docker-compose.yml`: +```yaml +services: + crawl4ai: + image: unclecode/crawl4ai:all + environment: + - CRAWL4AI_API_TOKEN=${CRAWL4AI_API_TOKEN:-} # Optional + # ... other configuration +``` + +Then either: +1. Set in `.env` file: +```env +CRAWL4AI_API_TOKEN=your_secret_token +``` + +2. Or set via command line: +```bash +CRAWL4AI_API_TOKEN=your_secret_token docker-compose up +``` + +> **Security Note**: If you enable the API token, make sure to keep it secure and never commit it to version control. The token will be required for all API endpoints except the health check endpoint (`/health`). + +## Configuration Options 🔧 + +### Environment Variables + +You can configure the service using environment variables: + +```bash +# Basic configuration +docker run -p 11235:11235 \ + -e MAX_CONCURRENT_TASKS=5 \ + unclecode/crawl4ai:all + +# With security and LLM support +docker run -p 11235:11235 \ + -e CRAWL4AI_API_TOKEN=your_secret_token \ + -e OPENAI_API_KEY=sk-... \ + -e ANTHROPIC_API_KEY=sk-ant-... \ + unclecode/crawl4ai:all +``` + +### Using Docker Compose (Recommended) 🐳 + +Create a `docker-compose.yml`: + +```yaml +version: '3.8' + +services: + crawl4ai: + image: unclecode/crawl4ai:all + ports: + - "11235:11235" + environment: + - CRAWL4AI_API_TOKEN=${CRAWL4AI_API_TOKEN:-} # Optional API security + - MAX_CONCURRENT_TASKS=5 + # LLM Provider Keys + - OPENAI_API_KEY=${OPENAI_API_KEY:-} + - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-} + volumes: + - /dev/shm:/dev/shm + deploy: + resources: + limits: + memory: 4G + reservations: + memory: 1G +``` + +You can run it in two ways: + +1. Using environment variables directly: +```bash +CRAWL4AI_API_TOKEN=secret123 OPENAI_API_KEY=sk-... docker-compose up +``` + +2. Using a `.env` file (recommended): +Create a `.env` file in the same directory: +```env +# API Security (optional) +CRAWL4AI_API_TOKEN=your_secret_token + +# LLM Provider Keys +OPENAI_API_KEY=sk-... +ANTHROPIC_API_KEY=sk-ant-... + +# Other Configuration +MAX_CONCURRENT_TASKS=5 +``` + +Then simply run: +```bash +docker-compose up +``` + +### Testing the Deployment 🧪 + +```python +import requests + +# For unsecured instances +def test_unsecured(): + # Health check + health = requests.get("http://localhost:11235/health") + print("Health check:", health.json()) + + # Basic crawl + response = requests.post( + "http://localhost:11235/crawl", + json={ + "urls": "https://www.nbcnews.com/business", + "priority": 10 + } + ) + task_id = response.json()["task_id"] + print("Task ID:", task_id) + +# For secured instances +def test_secured(api_token): + headers = {"Authorization": f"Bearer {api_token}"} + + # Basic crawl with authentication + response = requests.post( + "http://localhost:11235/crawl", + headers=headers, + json={ + "urls": "https://www.nbcnews.com/business", + "priority": 10 + } + ) + task_id = response.json()["task_id"] + print("Task ID:", task_id) +``` + +### LLM Extraction Example 🤖 + +When you've configured your LLM provider keys (via environment variables or `.env`), you can use LLM extraction: + +```python +request = { + "urls": "https://example.com", + "extraction_config": { + "type": "llm", + "params": { + "provider": "openai/gpt-4", + "instruction": "Extract main topics from the page" + } + } +} + +# Make the request (add headers if using API security) +response = requests.post("http://localhost:11235/crawl", json=request) +``` + +> **Note**: Remember to add `.env` to your `.gitignore` to keep your API keys secure! + + + + + + + + + + + + + + + + + + +## Usage Examples 📝 + +### Basic Crawling + +```python +request = { + "urls": "https://www.nbcnews.com/business", + "priority": 10 +} + +response = requests.post("http://localhost:11235/crawl", json=request) +task_id = response.json()["task_id"] + +# Get results +result = requests.get(f"http://localhost:11235/task/{task_id}") +``` + +### Structured Data Extraction + +```python +schema = { + "name": "Crypto Prices", + "baseSelector": ".cds-tableRow-t45thuk", + "fields": [ + { + "name": "crypto", + "selector": "td:nth-child(1) h2", + "type": "text", + }, + { + "name": "price", + "selector": "td:nth-child(2)", + "type": "text", + } + ], +} + +request = { + "urls": "https://www.coinbase.com/explore", + "extraction_config": { + "type": "json_css", + "params": {"schema": schema} + } +} +``` + +### Dynamic Content Handling + +```python +request = { + "urls": "https://www.nbcnews.com/business", + "js_code": [ + "const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); loadMoreButton && loadMoreButton.click();" + ], + "wait_for": "article.tease-card:nth-child(10)" +} +``` + +### AI-Powered Extraction (Full Version) + +```python +request = { + "urls": "https://www.nbcnews.com/business", + "extraction_config": { + "type": "cosine", + "params": { + "semantic_filter": "business finance economy", + "word_count_threshold": 10, + "max_dist": 0.2, + "top_k": 3 + } + } +} +``` + +## Platform-Specific Instructions 💻 + +### macOS +```bash +docker pull unclecode/crawl4ai:basic +docker run -p 11235:11235 unclecode/crawl4ai:basic +``` + +### Ubuntu +```bash +# Basic version +docker pull unclecode/crawl4ai:basic +docker run -p 11235:11235 unclecode/crawl4ai:basic + +# With GPU support +docker pull unclecode/crawl4ai:gpu +docker run --gpus all -p 11235:11235 unclecode/crawl4ai:gpu +``` + +### Windows (PowerShell) +```powershell +docker pull unclecode/crawl4ai:basic +docker run -p 11235:11235 unclecode/crawl4ai:basic +``` + +## Testing 🧪 + +Save this as `test_docker.py`: + +```python +import requests +import json +import time +import sys + +class Crawl4AiTester: + def __init__(self, base_url: str = "http://localhost:11235"): + self.base_url = base_url + + def submit_and_wait(self, request_data: dict, timeout: int = 300) -> dict: + # Submit crawl job + response = requests.post(f"{self.base_url}/crawl", json=request_data) + task_id = response.json()["task_id"] + print(f"Task ID: {task_id}") + + # Poll for result + start_time = time.time() + while True: + if time.time() - start_time > timeout: + raise TimeoutError(f"Task {task_id} timeout") + + result = requests.get(f"{self.base_url}/task/{task_id}") + status = result.json() + + if status["status"] == "completed": + return status + + time.sleep(2) + +def test_deployment(): + tester = Crawl4AiTester() + + # Test basic crawl + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 10 + } + + result = tester.submit_and_wait(request) + print("Basic crawl successful!") + print(f"Content length: {len(result['result']['markdown'])}") + +if __name__ == "__main__": + test_deployment() +``` + +## Advanced Configuration ⚙️ + +### Crawler Parameters + +The `crawler_params` field allows you to configure the browser instance and crawling behavior. Here are key parameters you can use: + +```python +request = { + "urls": "https://example.com", + "crawler_params": { + # Browser Configuration + "headless": True, # Run in headless mode + "browser_type": "chromium", # chromium/firefox/webkit + "user_agent": "custom-agent", # Custom user agent + "proxy": "http://proxy:8080", # Proxy configuration + + # Performance & Behavior + "page_timeout": 30000, # Page load timeout (ms) + "verbose": True, # Enable detailed logging + "semaphore_count": 5, # Concurrent request limit + + # Anti-Detection Features + "simulate_user": True, # Simulate human behavior + "magic": True, # Advanced anti-detection + "override_navigator": True, # Override navigator properties + + # Session Management + "user_data_dir": "./browser-data", # Browser profile location + "use_managed_browser": True, # Use persistent browser + } +} +``` + +### Extra Parameters + +The `extra` field allows passing additional parameters directly to the crawler's `arun` function: + +```python +request = { + "urls": "https://example.com", + "extra": { + "word_count_threshold": 10, # Min words per block + "only_text": True, # Extract only text + "bypass_cache": True, # Force fresh crawl + "process_iframes": True, # Include iframe content + } +} +``` + +### Complete Examples + +1. **Advanced News Crawling** +```python +request = { + "urls": "https://www.nbcnews.com/business", + "crawler_params": { + "headless": True, + "page_timeout": 30000, + "remove_overlay_elements": True # Remove popups + }, + "extra": { + "word_count_threshold": 50, # Longer content blocks + "bypass_cache": True # Fresh content + }, + "css_selector": ".article-body" +} +``` + +2. **Anti-Detection Configuration** +```python +request = { + "urls": "https://example.com", + "crawler_params": { + "simulate_user": True, + "magic": True, + "override_navigator": True, + "user_agent": "Mozilla/5.0 ...", + "headers": { + "Accept-Language": "en-US,en;q=0.9" + } + } +} +``` + +3. **LLM Extraction with Custom Parameters** +```python +request = { + "urls": "https://openai.com/pricing", + "extraction_config": { + "type": "llm", + "params": { + "provider": "openai/gpt-4", + "schema": pricing_schema + } + }, + "crawler_params": { + "verbose": True, + "page_timeout": 60000 + }, + "extra": { + "word_count_threshold": 1, + "only_text": True + } +} +``` + +4. **Session-Based Dynamic Content** +```python +request = { + "urls": "https://example.com", + "crawler_params": { + "session_id": "dynamic_session", + "headless": False, + "page_timeout": 60000 + }, + "js_code": ["window.scrollTo(0, document.body.scrollHeight);"], + "wait_for": "js:() => document.querySelectorAll('.item').length > 10", + "extra": { + "delay_before_return_html": 2.0 + } +} +``` + +5. **Screenshot with Custom Timing** +```python +request = { + "urls": "https://example.com", + "screenshot": True, + "crawler_params": { + "headless": True, + "screenshot_wait_for": ".main-content" + }, + "extra": { + "delay_before_return_html": 3.0 + } +} +``` + +### Parameter Reference Table + +| Category | Parameter | Type | Description | +|----------|-----------|------|-------------| +| Browser | headless | bool | Run browser in headless mode | +| Browser | browser_type | str | Browser engine selection | +| Browser | user_agent | str | Custom user agent string | +| Network | proxy | str | Proxy server URL | +| Network | headers | dict | Custom HTTP headers | +| Timing | page_timeout | int | Page load timeout (ms) | +| Timing | delay_before_return_html | float | Wait before capture | +| Anti-Detection | simulate_user | bool | Human behavior simulation | +| Anti-Detection | magic | bool | Advanced protection | +| Session | session_id | str | Browser session ID | +| Session | user_data_dir | str | Profile directory | +| Content | word_count_threshold | int | Minimum words per block | +| Content | only_text | bool | Text-only extraction | +| Content | process_iframes | bool | Include iframe content | +| Debug | verbose | bool | Detailed logging | +| Debug | log_console | bool | Browser console logs | + +## Troubleshooting 🔍 + +### Common Issues + +1. **Connection Refused** + ``` + Error: Connection refused at localhost:11235 + ``` + Solution: Ensure the container is running and ports are properly mapped. + +2. **Resource Limits** + ``` + Error: No available slots + ``` + Solution: Increase MAX_CONCURRENT_TASKS or container resources. + +3. **GPU Access** + ``` + Error: GPU not found + ``` + Solution: Ensure proper NVIDIA drivers and use `--gpus all` flag. + +### Debug Mode + +Access container for debugging: +```bash +docker run -it --entrypoint /bin/bash unclecode/crawl4ai:all +``` + +View container logs: +```bash +docker logs [container_id] +``` + +## Best Practices 🌟 + +1. **Resource Management** + - Set appropriate memory and CPU limits + - Monitor resource usage via health endpoint + - Use basic version for simple crawling tasks + +2. **Scaling** + - Use multiple containers for high load + - Implement proper load balancing + - Monitor performance metrics + +3. **Security** + - Use environment variables for sensitive data + - Implement proper network isolation + - Regular security updates + +## API Reference 📚 + +### Health Check +```http +GET /health +``` + +### Submit Crawl Task +```http +POST /crawl +Content-Type: application/json + +{ + "urls": "string or array", + "extraction_config": { + "type": "basic|llm|cosine|json_css", + "params": {} + }, + "priority": 1-10, + "ttl": 3600 +} +``` + +### Get Task Status +```http +GET /task/{task_id} +``` + +For more details, visit the [official documentation](https://crawl4ai.com/mkdocs/). \ No newline at end of file diff --git a/docs/md_v2/basic/file-download.md b/docs/md_v2/basic/file-download.md new file mode 100644 index 0000000..c37e881 --- /dev/null +++ b/docs/md_v2/basic/file-download.md @@ -0,0 +1,148 @@ +# Download Handling in Crawl4AI + +This guide explains how to use Crawl4AI to handle file downloads during crawling. You'll learn how to trigger downloads, specify download locations, and access downloaded files. + +## Enabling Downloads + +By default, Crawl4AI does not download files. To enable downloads, set the `accept_downloads` parameter to `True` in either the `AsyncWebCrawler` constructor or the `arun` method. + +```python +from crawl4ai import AsyncWebCrawler + +async def main(): + async with AsyncWebCrawler(accept_downloads=True) as crawler: # Globally enable downloads + # ... your crawling logic ... + +asyncio.run(main()) +``` + +Or, enable it for a specific crawl: + +```python +async def main(): + async with AsyncWebCrawler() as crawler: + result = await crawler.arun(url="...", accept_downloads=True) + # ... +``` + +## Specifying Download Location + +You can specify the download directory using the `downloads_path` parameter. If not provided, Crawl4AI creates a "downloads" directory inside the `.crawl4ai` folder in your home directory. + +```python +import os +from pathlib import Path + +# ... inside your crawl function: + +downloads_path = os.path.join(os.getcwd(), "my_downloads") # Custom download path +os.makedirs(downloads_path, exist_ok=True) + +result = await crawler.arun(url="...", downloads_path=downloads_path, accept_downloads=True) + +# ... +``` + +If you are setting it globally, provide the path to the AsyncWebCrawler: +```python +async def crawl_with_downloads(url: str, download_path: str): + async with AsyncWebCrawler( + accept_downloads=True, + downloads_path=download_path, # or set it on arun + verbose=True + ) as crawler: + result = await crawler.arun(url=url) # you still need to enable downloads per call. + # ... +``` + + + +## Triggering Downloads + +Downloads are typically triggered by user interactions on a web page (e.g., clicking a download button). You can simulate these actions with the `js_code` parameter, injecting JavaScript code to be executed within the browser context. The `wait_for` parameter might also be crucial to allowing sufficient time for downloads to initiate before the crawler proceeds. + +```python +result = await crawler.arun( + url="https://www.python.org/downloads/", + js_code=""" + // Find and click the first Windows installer link + const downloadLink = document.querySelector('a[href$=".exe"]'); + if (downloadLink) { + downloadLink.click(); + } + """, + wait_for=5 # Wait for 5 seconds for the download to start +) +``` + +## Accessing Downloaded Files + +Downloaded file paths are stored in the `downloaded_files` attribute of the returned `CrawlResult` object. This is a list of strings, with each string representing the absolute path to a downloaded file. + +```python +if result.downloaded_files: + print("Downloaded files:") + for file_path in result.downloaded_files: + print(f"- {file_path}") + # Perform operations with downloaded files, e.g., check file size + file_size = os.path.getsize(file_path) + print(f"- File size: {file_size} bytes") +else: + print("No files downloaded.") +``` + + +## Example: Downloading Multiple Files + +```python +import asyncio +import os +from pathlib import Path +from crawl4ai import AsyncWebCrawler + +async def download_multiple_files(url: str, download_path: str): + + async with AsyncWebCrawler( + accept_downloads=True, + downloads_path=download_path, + verbose=True + ) as crawler: + result = await crawler.arun( + url=url, + js_code=""" + // Trigger multiple downloads (example) + const downloadLinks = document.querySelectorAll('a[download]'); // Or a more specific selector + for (const link of downloadLinks) { + link.click(); + await new Promise(r => setTimeout(r, 2000)); // Add a small delay between clicks if needed + } + """, + wait_for=10 # Adjust the timeout to match the expected time for all downloads to start + ) + + if result.downloaded_files: + print("Downloaded files:") + for file in result.downloaded_files: + print(f"- {file}") + else: + print("No files downloaded.") + + +# Example usage +download_path = os.path.join(Path.home(), ".crawl4ai", "downloads") +os.makedirs(download_path, exist_ok=True) # Create directory if it doesn't exist + + +asyncio.run(download_multiple_files("https://www.python.org/downloads/windows/", download_path)) +``` + +## Important Considerations + +- **Browser Context:** Downloads are managed within the browser context. Ensure your `js_code` correctly targets the download triggers on the specific web page. +- **Waiting:** Use `wait_for` to manage the timing of the crawl process if immediate download might not occur. +- **Error Handling:** Implement proper error handling to gracefully manage failed downloads or incorrect file paths. +- **Security:** Downloaded files should be scanned for potential security threats before use. + + + +This guide provides a foundation for handling downloads with Crawl4AI. You can adapt these techniques to manage downloads in various scenarios and integrate them into more complex crawling workflows. diff --git a/docs/md/installation.md b/docs/md_v2/basic/installation.md similarity index 73% rename from docs/md/installation.md rename to docs/md_v2/basic/installation.md index a4a6085..de8aeaf 100644 --- a/docs/md/installation.md +++ b/docs/md_v2/basic/installation.md @@ -58,6 +58,51 @@ crawl4ai-download-models This is optional but will boost the performance and speed of the crawler. You only need to do this once after installation. +## Playwright Installation Note for Ubuntu + +If you encounter issues with Playwright installation on Ubuntu, you may need to install additional dependencies: + +```bash +sudo apt-get install -y \ + libwoff1 \ + libopus0 \ + libwebp7 \ + libwebpdemux2 \ + libenchant-2-2 \ + libgudev-1.0-0 \ + libsecret-1-0 \ + libhyphen0 \ + libgdk-pixbuf2.0-0 \ + libegl1 \ + libnotify4 \ + libxslt1.1 \ + libevent-2.1-7 \ + libgles2 \ + libxcomposite1 \ + libatk1.0-0 \ + libatk-bridge2.0-0 \ + libepoxy0 \ + libgtk-3-0 \ + libharfbuzz-icu0 \ + libgstreamer-gl1.0-0 \ + libgstreamer-plugins-bad1.0-0 \ + gstreamer1.0-plugins-good \ + gstreamer1.0-plugins-bad \ + libxt6 \ + libxaw7 \ + xvfb \ + fonts-noto-color-emoji \ + libfontconfig \ + libfreetype6 \ + xfonts-cyrillic \ + xfonts-scalable \ + fonts-liberation \ + fonts-ipafont-gothic \ + fonts-wqy-zenhei \ + fonts-tlwg-loma-otf \ + fonts-freefont-ttf +``` + ## Option 2: Using Docker (Coming Soon) Docker support for Crawl4AI is currently in progress and will be available soon. This will allow you to run Crawl4AI in a containerized environment, ensuring consistency across different systems. diff --git a/docs/md_v2/basic/output-formats.md b/docs/md_v2/basic/output-formats.md new file mode 100644 index 0000000..0d25e88 --- /dev/null +++ b/docs/md_v2/basic/output-formats.md @@ -0,0 +1,195 @@ +# Output Formats + +Crawl4AI provides multiple output formats to suit different needs, from raw HTML to structured data using LLM or pattern-based extraction. + +## Basic Formats + +```python +result = await crawler.arun(url="https://example.com") + +# Access different formats +raw_html = result.html # Original HTML +clean_html = result.cleaned_html # Sanitized HTML +markdown = result.markdown # Standard markdown +fit_md = result.fit_markdown # Most relevant content in markdown +``` + +## Raw HTML + +Original, unmodified HTML from the webpage. Useful when you need to: +- Preserve the exact page structure +- Process HTML with your own tools +- Debug page issues + +```python +result = await crawler.arun(url="https://example.com") +print(result.html) # Complete HTML including headers, scripts, etc. +``` + +## Cleaned HTML + +Sanitized HTML with unnecessary elements removed. Automatically: +- Removes scripts and styles +- Cleans up formatting +- Preserves semantic structure + +```python +result = await crawler.arun( + url="https://example.com", + excluded_tags=['form', 'header', 'footer'], # Additional tags to remove + keep_data_attributes=False # Remove data-* attributes +) +print(result.cleaned_html) +``` + +## Standard Markdown + +HTML converted to clean markdown format. Great for: +- Content analysis +- Documentation +- Readability + +```python +result = await crawler.arun( + url="https://example.com", + include_links_on_markdown=True # Include links in markdown +) +print(result.markdown) +``` + +## Fit Markdown + +Most relevant content extracted and converted to markdown. Ideal for: +- Article extraction +- Main content focus +- Removing boilerplate + +```python +result = await crawler.arun(url="https://example.com") +print(result.fit_markdown) # Only the main content +``` + +## Structured Data Extraction + +Crawl4AI offers two powerful approaches for structured data extraction: + +### 1. LLM-Based Extraction + +Use any LLM (OpenAI, HuggingFace, Ollama, etc.) to extract structured data with high accuracy: + +```python +from pydantic import BaseModel +from crawl4ai.extraction_strategy import LLMExtractionStrategy + +class KnowledgeGraph(BaseModel): + entities: List[dict] + relationships: List[dict] + +strategy = LLMExtractionStrategy( + provider="ollama/nemotron", # or "huggingface/...", "ollama/..." + api_token="your-token", # not needed for Ollama + schema=KnowledgeGraph.schema(), + instruction="Extract entities and relationships from the content" +) + +result = await crawler.arun( + url="https://example.com", + extraction_strategy=strategy +) +knowledge_graph = json.loads(result.extracted_content) +``` + +### 2. Pattern-Based Extraction + +For pages with repetitive patterns (e.g., product listings, article feeds), use JsonCssExtractionStrategy: + +```python +from crawl4ai.extraction_strategy import JsonCssExtractionStrategy + +schema = { + "name": "Product Listing", + "baseSelector": ".product-card", # Repeated element + "fields": [ + {"name": "title", "selector": "h2", "type": "text"}, + {"name": "price", "selector": ".price", "type": "text"}, + {"name": "description", "selector": ".desc", "type": "text"} + ] +} + +strategy = JsonCssExtractionStrategy(schema) +result = await crawler.arun( + url="https://example.com", + extraction_strategy=strategy +) +products = json.loads(result.extracted_content) +``` + +## Content Customization + +### HTML to Text Options + +Configure markdown conversion: + +```python +result = await crawler.arun( + url="https://example.com", + html2text={ + "escape_dot": False, + "body_width": 0, + "protect_links": True, + "unicode_snob": True + } +) +``` + +### Content Filters + +Control what content is included: + +```python +result = await crawler.arun( + url="https://example.com", + word_count_threshold=10, # Minimum words per block + exclude_external_links=True, # Remove external links + exclude_external_images=True, # Remove external images + excluded_tags=['form', 'nav'] # Remove specific HTML tags +) +``` + +## Comprehensive Example + +Here's how to use multiple output formats together: + +```python +async def crawl_content(url: str): + async with AsyncWebCrawler() as crawler: + # Extract main content with fit markdown + result = await crawler.arun( + url=url, + word_count_threshold=10, + exclude_external_links=True + ) + + # Get structured data using LLM + llm_result = await crawler.arun( + url=url, + extraction_strategy=LLMExtractionStrategy( + provider="ollama/nemotron", + schema=YourSchema.schema(), + instruction="Extract key information" + ) + ) + + # Get repeated patterns (if any) + pattern_result = await crawler.arun( + url=url, + extraction_strategy=JsonCssExtractionStrategy(your_schema) + ) + + return { + "main_content": result.fit_markdown, + "structured_data": json.loads(llm_result.extracted_content), + "pattern_data": json.loads(pattern_result.extracted_content), + "media": result.media + } +``` \ No newline at end of file diff --git a/docs/md_v2/basic/page-interaction.md b/docs/md_v2/basic/page-interaction.md new file mode 100644 index 0000000..7555f22 --- /dev/null +++ b/docs/md_v2/basic/page-interaction.md @@ -0,0 +1,207 @@ +# Page Interaction + +Crawl4AI provides powerful features for interacting with dynamic webpages, handling JavaScript execution, and managing page events. + +## JavaScript Execution + +### Basic Execution + +```python +# Single JavaScript command +result = await crawler.arun( + url="https://example.com", + js_code="window.scrollTo(0, document.body.scrollHeight);" +) + +# Multiple commands +js_commands = [ + "window.scrollTo(0, document.body.scrollHeight);", + "document.querySelector('.load-more').click();", + "document.querySelector('#consent-button').click();" +] +result = await crawler.arun( + url="https://example.com", + js_code=js_commands +) +``` + +## Wait Conditions + +### CSS-Based Waiting + +Wait for elements to appear: + +```python +result = await crawler.arun( + url="https://example.com", + wait_for="css:.dynamic-content" # Wait for element with class 'dynamic-content' +) +``` + +### JavaScript-Based Waiting + +Wait for custom conditions: + +```python +# Wait for number of elements +wait_condition = """() => { + return document.querySelectorAll('.item').length > 10; +}""" + +result = await crawler.arun( + url="https://example.com", + wait_for=f"js:{wait_condition}" +) + +# Wait for dynamic content to load +wait_for_content = """() => { + const content = document.querySelector('.content'); + return content && content.innerText.length > 100; +}""" + +result = await crawler.arun( + url="https://example.com", + wait_for=f"js:{wait_for_content}" +) +``` + +## Handling Dynamic Content + +### Load More Content + +Handle infinite scroll or load more buttons: + +```python +# Scroll and wait pattern +result = await crawler.arun( + url="https://example.com", + js_code=[ + # Scroll to bottom + "window.scrollTo(0, document.body.scrollHeight);", + # Click load more if exists + "const loadMore = document.querySelector('.load-more'); if(loadMore) loadMore.click();" + ], + # Wait for new content + wait_for="js:() => document.querySelectorAll('.item').length > previousCount" +) +``` + +### Form Interaction + +Handle forms and inputs: + +```python +js_form_interaction = """ + // Fill form fields + document.querySelector('#search').value = 'search term'; + // Submit form + document.querySelector('form').submit(); +""" + +result = await crawler.arun( + url="https://example.com", + js_code=js_form_interaction, + wait_for="css:.results" # Wait for results to load +) +``` + +## Timing Control + +### Delays and Timeouts + +Control timing of interactions: + +```python +result = await crawler.arun( + url="https://example.com", + page_timeout=60000, # Page load timeout (ms) + delay_before_return_html=2.0, # Wait before capturing content +) +``` + +## Complex Interactions Example + +Here's an example of handling a dynamic page with multiple interactions: + +```python +async def crawl_dynamic_content(): + async with AsyncWebCrawler() as crawler: + # Initial page load + result = await crawler.arun( + url="https://example.com", + # Handle cookie consent + js_code="document.querySelector('.cookie-accept')?.click();", + wait_for="css:.main-content" + ) + + # Load more content + session_id = "dynamic_session" # Keep session for multiple interactions + + for page in range(3): # Load 3 pages of content + result = await crawler.arun( + url="https://example.com", + session_id=session_id, + js_code=[ + # Scroll to bottom + "window.scrollTo(0, document.body.scrollHeight);", + # Store current item count + "window.previousCount = document.querySelectorAll('.item').length;", + # Click load more + "document.querySelector('.load-more')?.click();" + ], + # Wait for new items + wait_for="""() => { + const currentCount = document.querySelectorAll('.item').length; + return currentCount > window.previousCount; + }""", + # Only execute JS without reloading page + js_only=True if page > 0 else False + ) + + # Process content after each load + print(f"Page {page + 1} items:", len(result.cleaned_html)) + + # Clean up session + await crawler.crawler_strategy.kill_session(session_id) +``` + +## Using with Extraction Strategies + +Combine page interaction with structured extraction: + +```python +from crawl4ai.extraction_strategy import JsonCssExtractionStrategy, LLMExtractionStrategy + +# Pattern-based extraction after interaction +schema = { + "name": "Dynamic Items", + "baseSelector": ".item", + "fields": [ + {"name": "title", "selector": "h2", "type": "text"}, + {"name": "description", "selector": ".desc", "type": "text"} + ] +} + +result = await crawler.arun( + url="https://example.com", + js_code="window.scrollTo(0, document.body.scrollHeight);", + wait_for="css:.item:nth-child(10)", # Wait for 10 items + extraction_strategy=JsonCssExtractionStrategy(schema) +) + +# Or use LLM to analyze dynamic content +class ContentAnalysis(BaseModel): + topics: List[str] + summary: str + +result = await crawler.arun( + url="https://example.com", + js_code="document.querySelector('.show-more').click();", + wait_for="css:.full-content", + extraction_strategy=LLMExtractionStrategy( + provider="ollama/nemotron", + schema=ContentAnalysis.schema(), + instruction="Analyze the full content" + ) +) +``` \ No newline at end of file diff --git a/docs/md_v2/basic/prefix-based-input.md b/docs/md_v2/basic/prefix-based-input.md new file mode 100644 index 0000000..42987a6 --- /dev/null +++ b/docs/md_v2/basic/prefix-based-input.md @@ -0,0 +1,235 @@ +# Prefix-Based Input Handling in Crawl4AI + +This guide will walk you through using the Crawl4AI library to crawl web pages, local HTML files, and raw HTML strings. We'll demonstrate these capabilities using a Wikipedia page as an example. + +## Table of Contents +- [Prefix-Based Input Handling in Crawl4AI](#prefix-based-input-handling-in-crawl4ai) + - [Table of Contents](#table-of-contents) + - [Crawling a Web URL](#crawling-a-web-url) + - [Crawling a Local HTML File](#crawling-a-local-html-file) + - [Crawling Raw HTML Content](#crawling-raw-html-content) + - [Complete Example](#complete-example) + - [**How It Works**](#how-it-works) + - [**Running the Example**](#running-the-example) + - [Conclusion](#conclusion) + +--- + + +### Crawling a Web URL + +To crawl a live web page, provide the URL starting with `http://` or `https://`. + +```python +import asyncio +from crawl4ai import AsyncWebCrawler + +async def crawl_web(): + async with AsyncWebCrawler(verbose=True) as crawler: + result = await crawler.arun(url="https://en.wikipedia.org/wiki/apple", bypass_cache=True) + if result.success: + print("Markdown Content:") + print(result.markdown) + else: + print(f"Failed to crawl: {result.error_message}") + +asyncio.run(crawl_web()) +``` + +### Crawling a Local HTML File + +To crawl a local HTML file, prefix the file path with `file://`. + +```python +import asyncio +from crawl4ai import AsyncWebCrawler + +async def crawl_local_file(): + local_file_path = "/path/to/apple.html" # Replace with your file path + file_url = f"file://{local_file_path}" + + async with AsyncWebCrawler(verbose=True) as crawler: + result = await crawler.arun(url=file_url, bypass_cache=True) + if result.success: + print("Markdown Content from Local File:") + print(result.markdown) + else: + print(f"Failed to crawl local file: {result.error_message}") + +asyncio.run(crawl_local_file()) +``` + +### Crawling Raw HTML Content + +To crawl raw HTML content, prefix the HTML string with `raw:`. + +```python +import asyncio +from crawl4ai import AsyncWebCrawler + +async def crawl_raw_html(): + raw_html = "

    Hello, World!

    " + raw_html_url = f"raw:{raw_html}" + + async with AsyncWebCrawler(verbose=True) as crawler: + result = await crawler.arun(url=raw_html_url, bypass_cache=True) + if result.success: + print("Markdown Content from Raw HTML:") + print(result.markdown) + else: + print(f"Failed to crawl raw HTML: {result.error_message}") + +asyncio.run(crawl_raw_html()) +``` + +--- + +## Complete Example + +Below is a comprehensive script that: +1. **Crawls the Wikipedia page for "Apple".** +2. **Saves the HTML content to a local file (`apple.html`).** +3. **Crawls the local HTML file and verifies the markdown length matches the original crawl.** +4. **Crawls the raw HTML content from the saved file and verifies consistency.** + +```python +import os +import sys +import asyncio +from pathlib import Path + +# Adjust the parent directory to include the crawl4ai module +parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(parent_dir) + +from crawl4ai import AsyncWebCrawler + +async def main(): + # Define the URL to crawl + wikipedia_url = "https://en.wikipedia.org/wiki/apple" + + # Define the path to save the HTML file + # Save the file in the same directory as the script + script_dir = Path(__file__).parent + html_file_path = script_dir / "apple.html" + + async with AsyncWebCrawler(verbose=True) as crawler: + print("\n=== Step 1: Crawling the Wikipedia URL ===") + # Crawl the Wikipedia URL + result = await crawler.arun(url=wikipedia_url, bypass_cache=True) + + # Check if crawling was successful + if not result.success: + print(f"Failed to crawl {wikipedia_url}: {result.error_message}") + return + + # Save the HTML content to a local file + with open(html_file_path, 'w', encoding='utf-8') as f: + f.write(result.html) + print(f"Saved HTML content to {html_file_path}") + + # Store the length of the generated markdown + web_crawl_length = len(result.markdown) + print(f"Length of markdown from web crawl: {web_crawl_length}\n") + + print("=== Step 2: Crawling from the Local HTML File ===") + # Construct the file URL with 'file://' prefix + file_url = f"file://{html_file_path.resolve()}" + + # Crawl the local HTML file + local_result = await crawler.arun(url=file_url, bypass_cache=True) + + # Check if crawling was successful + if not local_result.success: + print(f"Failed to crawl local file {file_url}: {local_result.error_message}") + return + + # Store the length of the generated markdown from local file + local_crawl_length = len(local_result.markdown) + print(f"Length of markdown from local file crawl: {local_crawl_length}") + + # Compare the lengths + assert web_crawl_length == local_crawl_length, ( + f"Markdown length mismatch: Web crawl ({web_crawl_length}) != Local file crawl ({local_crawl_length})" + ) + print("✅ Markdown length matches between web crawl and local file crawl.\n") + + print("=== Step 3: Crawling Using Raw HTML Content ===") + # Read the HTML content from the saved file + with open(html_file_path, 'r', encoding='utf-8') as f: + raw_html_content = f.read() + + # Prefix the raw HTML content with 'raw:' + raw_html_url = f"raw:{raw_html_content}" + + # Crawl using the raw HTML content + raw_result = await crawler.arun(url=raw_html_url, bypass_cache=True) + + # Check if crawling was successful + if not raw_result.success: + print(f"Failed to crawl raw HTML content: {raw_result.error_message}") + return + + # Store the length of the generated markdown from raw HTML + raw_crawl_length = len(raw_result.markdown) + print(f"Length of markdown from raw HTML crawl: {raw_crawl_length}") + + # Compare the lengths + assert web_crawl_length == raw_crawl_length, ( + f"Markdown length mismatch: Web crawl ({web_crawl_length}) != Raw HTML crawl ({raw_crawl_length})" + ) + print("✅ Markdown length matches between web crawl and raw HTML crawl.\n") + + print("All tests passed successfully!") + + # Clean up by removing the saved HTML file + if html_file_path.exists(): + os.remove(html_file_path) + print(f"Removed the saved HTML file: {html_file_path}") + +# Run the main function +if __name__ == "__main__": + asyncio.run(main()) +``` + +### **How It Works** + +1. **Step 1: Crawl the Web URL** + - Crawls `https://en.wikipedia.org/wiki/apple`. + - Saves the HTML content to `apple.html`. + - Records the length of the generated markdown. + +2. **Step 2: Crawl from the Local HTML File** + - Uses the `file://` prefix to crawl `apple.html`. + - Ensures the markdown length matches the original web crawl. + +3. **Step 3: Crawl Using Raw HTML Content** + - Reads the HTML from `apple.html`. + - Prefixes it with `raw:` and crawls. + - Verifies the markdown length matches the previous results. + +4. **Cleanup** + - Deletes the `apple.html` file after testing. + +### **Running the Example** + +1. **Save the Script:** + - Save the above code as `test_crawl4ai.py` in your project directory. + +2. **Execute the Script:** + - Run the script using: + ```bash + python test_crawl4ai.py + ``` + +3. **Observe the Output:** + - The script will print logs detailing each step. + - Assertions ensure consistency across different crawling methods. + - Upon success, it confirms that all markdown lengths match. + +--- + +## Conclusion + +With the new prefix-based input handling in **Crawl4AI**, you can effortlessly crawl web URLs, local HTML files, and raw HTML strings using a unified `url` parameter. This enhancement simplifies the API usage and provides greater flexibility for diverse crawling scenarios. + diff --git a/docs/md_v2/basic/quickstart.md b/docs/md_v2/basic/quickstart.md new file mode 100644 index 0000000..95b8a39 --- /dev/null +++ b/docs/md_v2/basic/quickstart.md @@ -0,0 +1,297 @@ +# Quick Start Guide 🚀 + +Welcome to the Crawl4AI Quickstart Guide! In this tutorial, we'll walk you through the basic usage of Crawl4AI with a friendly and humorous tone. We'll cover everything from basic usage to advanced features like chunking and extraction strategies, all with the power of asynchronous programming. Let's dive in! 🌟 + +## Getting Started 🛠️ + +First, let's import the necessary modules and create an instance of `AsyncWebCrawler`. We'll use an async context manager, which handles the setup and teardown of the crawler for us. + +```python +import asyncio +from crawl4ai import AsyncWebCrawler, CasheMode + +async def main(): + async with AsyncWebCrawler(verbose=True) as crawler: + # We'll add our crawling code here + pass + +if __name__ == "__main__": + asyncio.run(main()) +``` + +### Basic Usage + +Simply provide a URL and let Crawl4AI do the magic! + +```python +async def main(): + async with AsyncWebCrawler(verbose=True) as crawler: + result = await crawler.arun(url="https://www.nbcnews.com/business") + print(f"Basic crawl result: {result.markdown[:500]}") # Print first 500 characters + +asyncio.run(main()) +``` + +### Taking Screenshots 📸 + +Capture screenshots of web pages easily: + +```python +async def capture_and_save_screenshot(url: str, output_path: str): + async with AsyncWebCrawler(verbose=True) as crawler: + result = await crawler.arun( + url=url, + screenshot=True, + cache_mode=CacheMode.BYPASS + ) + + if result.success and result.screenshot: + import base64 + screenshot_data = base64.b64decode(result.screenshot) + with open(output_path, 'wb') as f: + f.write(screenshot_data) + print(f"Screenshot saved successfully to {output_path}") + else: + print("Failed to capture screenshot") +``` + +### Browser Selection 🌐 + +Crawl4AI supports multiple browser engines. Here's how to use different browsers: + +```python +# Use Firefox +async with AsyncWebCrawler(browser_type="firefox", verbose=True, headless=True) as crawler: + result = await crawler.arun(url="https://www.example.com", cache_mode=CacheMode.BYPASS) + +# Use WebKit +async with AsyncWebCrawler(browser_type="webkit", verbose=True, headless=True) as crawler: + result = await crawler.arun(url="https://www.example.com", cache_mode=CacheMode.BYPASS) + +# Use Chromium (default) +async with AsyncWebCrawler(verbose=True, headless=True) as crawler: + result = await crawler.arun(url="https://www.example.com", cache_mode=CacheMode.BYPASS) +``` + +### User Simulation 🎭 + +Simulate real user behavior to avoid detection: + +```python +async with AsyncWebCrawler(verbose=True, headless=True) as crawler: + result = await crawler.arun( + url="YOUR-URL-HERE", + cache_mode=CacheMode.BYPASS, + simulate_user=True, # Causes random mouse movements and clicks + override_navigator=True # Makes the browser appear more like a real user + ) +``` + +### Understanding Parameters 🧠 + +By default, Crawl4AI caches the results of your crawls. This means that subsequent crawls of the same URL will be much faster! Let's see this in action. + +```python +async def main(): + async with AsyncWebCrawler(verbose=True) as crawler: + # First crawl (caches the result) + result1 = await crawler.arun(url="https://www.nbcnews.com/business") + print(f"First crawl result: {result1.markdown[:100]}...") + + # Force to crawl again + result2 = await crawler.arun(url="https://www.nbcnews.com/business", cache_mode=CacheMode.BYPASS) + print(f"Second crawl result: {result2.markdown[:100]}...") + +asyncio.run(main()) +``` + +### Adding a Chunking Strategy 🧩 + +Let's add a chunking strategy: `RegexChunking`! This strategy splits the text based on a given regex pattern. + +```python +from crawl4ai.chunking_strategy import RegexChunking + +async def main(): + async with AsyncWebCrawler(verbose=True) as crawler: + result = await crawler.arun( + url="https://www.nbcnews.com/business", + chunking_strategy=RegexChunking(patterns=["\n\n"]) + ) + print(f"RegexChunking result: {result.extracted_content[:200]}...") + +asyncio.run(main()) +``` + +### Using LLMExtractionStrategy with Different Providers 🤖 + +Crawl4AI supports multiple LLM providers for extraction: + +```python +from crawl4ai.extraction_strategy import LLMExtractionStrategy +from pydantic import BaseModel, Field + +class OpenAIModelFee(BaseModel): + model_name: str = Field(..., description="Name of the OpenAI model.") + input_fee: str = Field(..., description="Fee for input token for the OpenAI model.") + output_fee: str = Field(..., description="Fee for output token for the OpenAI model.") + +# OpenAI +await extract_structured_data_using_llm("openai/gpt-4o", os.getenv("OPENAI_API_KEY")) + +# Hugging Face +await extract_structured_data_using_llm( + "huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct", + os.getenv("HUGGINGFACE_API_KEY") +) + +# Ollama +await extract_structured_data_using_llm("ollama/llama3.2") + +# With custom headers +custom_headers = { + "Authorization": "Bearer your-custom-token", + "X-Custom-Header": "Some-Value" +} +await extract_structured_data_using_llm(extra_headers=custom_headers) +``` + +### Knowledge Graph Generation 🕸️ + +Generate knowledge graphs from web content: + +```python +from pydantic import BaseModel +from typing import List + +class Entity(BaseModel): + name: str + description: str + +class Relationship(BaseModel): + entity1: Entity + entity2: Entity + description: str + relation_type: str + +class KnowledgeGraph(BaseModel): + entities: List[Entity] + relationships: List[Relationship] + +extraction_strategy = LLMExtractionStrategy( + provider='openai/gpt-4o-mini', + api_token=os.getenv('OPENAI_API_KEY'), + schema=KnowledgeGraph.model_json_schema(), + extraction_type="schema", + instruction="Extract entities and relationships from the given text." +) + +async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://paulgraham.com/love.html", + cache_mode=CacheMode.BYPASS, + extraction_strategy=extraction_strategy + ) +``` + +### Advanced Session-Based Crawling with Dynamic Content 🔄 + +For modern web applications with dynamic content loading, here's how to handle pagination and content updates: + +```python +async def crawl_dynamic_content(): + async with AsyncWebCrawler(verbose=True) as crawler: + url = "https://github.com/microsoft/TypeScript/commits/main" + session_id = "typescript_commits_session" + + js_next_page = """ + const button = document.querySelector('a[data-testid="pagination-next-button"]'); + if (button) button.click(); + """ + + wait_for = """() => { + const commits = document.querySelectorAll('li.Box-sc-g0xbh4-0 h4'); + if (commits.length === 0) return false; + const firstCommit = commits[0].textContent.trim(); + return firstCommit !== window.firstCommit; + }""" + + schema = { + "name": "Commit Extractor", + "baseSelector": "li.Box-sc-g0xbh4-0", + "fields": [ + { + "name": "title", + "selector": "h4.markdown-title", + "type": "text", + "transform": "strip", + }, + ], + } + extraction_strategy = JsonCssExtractionStrategy(schema, verbose=True) + + for page in range(3): # Crawl 3 pages + result = await crawler.arun( + url=url, + session_id=session_id, + css_selector="li.Box-sc-g0xbh4-0", + extraction_strategy=extraction_strategy, + js_code=js_next_page if page > 0 else None, + wait_for=wait_for if page > 0 else None, + js_only=page > 0, + cache_mode=CacheMode.BYPASS, + headless=False, + ) + + await crawler.crawler_strategy.kill_session(session_id) +``` + +### Handling Overlays and Fitting Content 📏 + +Remove overlay elements and fit content appropriately: + +```python +async with AsyncWebCrawler(headless=False) as crawler: + result = await crawler.arun( + url="your-url-here", + cache_mode=CacheMode.BYPASS, + word_count_threshold=10, + remove_overlay_elements=True, + screenshot=True + ) +``` + +## Performance Comparison 🏎️ + +Crawl4AI offers impressive performance compared to other solutions: + +```python +# Firecrawl comparison +from firecrawl import FirecrawlApp +app = FirecrawlApp(api_key=os.environ['FIRECRAWL_API_KEY']) +start = time.time() +scrape_status = app.scrape_url( + 'https://www.nbcnews.com/business', + params={'formats': ['markdown', 'html']} +) +end = time.time() + +# Crawl4AI comparison +async with AsyncWebCrawler() as crawler: + start = time.time() + result = await crawler.arun( + url="https://www.nbcnews.com/business", + word_count_threshold=0, + cache_mode=CacheMode.BYPASS, + verbose=False, + ) + end = time.time() +``` + +Note: Performance comparisons should be conducted in environments with stable and fast internet connections for accurate results. + +## Congratulations! 🎉 + +You've made it through the updated Crawl4AI Quickstart Guide! Now you're equipped with even more powerful features to crawl the web asynchronously like a pro! 🕸️ + +Happy crawling! 🚀 \ No newline at end of file diff --git a/docs/md_v2/basic/simple-crawling.md b/docs/md_v2/basic/simple-crawling.md new file mode 100644 index 0000000..871fa64 --- /dev/null +++ b/docs/md_v2/basic/simple-crawling.md @@ -0,0 +1,122 @@ +# Simple Crawling + +This guide covers the basics of web crawling with Crawl4AI. You'll learn how to set up a crawler, make your first request, and understand the response. + +## Basic Usage + +Here's the simplest way to crawl a webpage: + +```python +import asyncio +from crawl4ai import AsyncWebCrawler + +async def main(): + async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://example.com" + ) + print(result.markdown) # Print clean markdown content + +if __name__ == "__main__": + asyncio.run(main()) +``` + +## Understanding the Response + +The `arun()` method returns a `CrawlResult` object with several useful properties. Here's a quick overview (see [CrawlResult](../api/crawl-result.md) for complete details): + +```python +result = await crawler.arun(url="https://example.com", fit_markdown=True) + +# Different content formats +print(result.html) # Raw HTML +print(result.cleaned_html) # Cleaned HTML +print(result.markdown) # Markdown version +print(result.fit_markdown) # Most relevant content in markdown + +# Check success status +print(result.success) # True if crawl succeeded +print(result.status_code) # HTTP status code (e.g., 200, 404) + +# Access extracted media and links +print(result.media) # Dictionary of found media (images, videos, audio) +print(result.links) # Dictionary of internal and external links +``` + +## Adding Basic Options + +Customize your crawl with these common options: + +```python +result = await crawler.arun( + url="https://example.com", + word_count_threshold=10, # Minimum words per content block + exclude_external_links=True, # Remove external links + remove_overlay_elements=True, # Remove popups/modals + process_iframes=True # Process iframe content +) +``` + +## Handling Errors + +Always check if the crawl was successful: + +```python +result = await crawler.arun(url="https://example.com") +if not result.success: + print(f"Crawl failed: {result.error_message}") + print(f"Status code: {result.status_code}") +``` + +## Logging and Debugging + +Enable verbose mode for detailed logging: + +```python +async with AsyncWebCrawler(verbose=True) as crawler: + result = await crawler.arun(url="https://example.com") +``` + +## Complete Example + +Here's a more comprehensive example showing common usage patterns: + +```python +import asyncio +from crawl4ai import AsyncWebCrawler, CacheMode + +async def main(): + async with AsyncWebCrawler(verbose=True) as crawler: + result = await crawler.arun( + url="https://example.com", + # Content filtering + word_count_threshold=10, + excluded_tags=['form', 'header'], + exclude_external_links=True, + + # Content processing + process_iframes=True, + remove_overlay_elements=True, + + # Cache control + cache_mode=CacheMode.ENABLE # Use cache if available + ) + + if result.success: + # Print clean content + print("Content:", result.markdown[:500]) # First 500 chars + + # Process images + for image in result.media["images"]: + print(f"Found image: {image['src']}") + + # Process links + for link in result.links["internal"]: + print(f"Internal link: {link['href']}") + + else: + print(f"Crawl failed: {result.error_message}") + +if __name__ == "__main__": + asyncio.run(main()) +``` diff --git a/docs/md _sync/full_details/chunking_strategies.md b/docs/md_v2/extraction/chunking.md similarity index 100% rename from docs/md _sync/full_details/chunking_strategies.md rename to docs/md_v2/extraction/chunking.md diff --git a/docs/md_v2/extraction/cosine.md b/docs/md_v2/extraction/cosine.md new file mode 100644 index 0000000..9ce49e4 --- /dev/null +++ b/docs/md_v2/extraction/cosine.md @@ -0,0 +1,222 @@ +# Cosine Strategy + +The Cosine Strategy in Crawl4AI uses similarity-based clustering to identify and extract relevant content sections from web pages. This strategy is particularly useful when you need to find and extract content based on semantic similarity rather than structural patterns. + +## How It Works + +The Cosine Strategy: +1. Breaks down page content into meaningful chunks +2. Converts text into vector representations +3. Calculates similarity between chunks +4. Clusters similar content together +5. Ranks and filters content based on relevance + +## Basic Usage + +```python +from crawl4ai.extraction_strategy import CosineStrategy + +strategy = CosineStrategy( + semantic_filter="product reviews", # Target content type + word_count_threshold=10, # Minimum words per cluster + sim_threshold=0.3 # Similarity threshold +) + +async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://example.com/reviews", + extraction_strategy=strategy + ) + + content = result.extracted_content +``` + +## Configuration Options + +### Core Parameters + +```python +CosineStrategy( + # Content Filtering + semantic_filter: str = None, # Keywords/topic for content filtering + word_count_threshold: int = 10, # Minimum words per cluster + sim_threshold: float = 0.3, # Similarity threshold (0.0 to 1.0) + + # Clustering Parameters + max_dist: float = 0.2, # Maximum distance for clustering + linkage_method: str = 'ward', # Clustering linkage method + top_k: int = 3, # Number of top categories to extract + + # Model Configuration + model_name: str = 'sentence-transformers/all-MiniLM-L6-v2', # Embedding model + + verbose: bool = False # Enable logging +) +``` + +### Parameter Details + +1. **semantic_filter** + - Sets the target topic or content type + - Use keywords relevant to your desired content + - Example: "technical specifications", "user reviews", "pricing information" + +2. **sim_threshold** + - Controls how similar content must be to be grouped together + - Higher values (e.g., 0.8) mean stricter matching + - Lower values (e.g., 0.3) allow more variation + ```python + # Strict matching + strategy = CosineStrategy(sim_threshold=0.8) + + # Loose matching + strategy = CosineStrategy(sim_threshold=0.3) + ``` + +3. **word_count_threshold** + - Filters out short content blocks + - Helps eliminate noise and irrelevant content + ```python + # Only consider substantial paragraphs + strategy = CosineStrategy(word_count_threshold=50) + ``` + +4. **top_k** + - Number of top content clusters to return + - Higher values return more diverse content + ```python + # Get top 5 most relevant content clusters + strategy = CosineStrategy(top_k=5) + ``` + +## Use Cases + +### 1. Article Content Extraction +```python +strategy = CosineStrategy( + semantic_filter="main article content", + word_count_threshold=100, # Longer blocks for articles + top_k=1 # Usually want single main content +) + +result = await crawler.arun( + url="https://example.com/blog/post", + extraction_strategy=strategy +) +``` + +### 2. Product Review Analysis +```python +strategy = CosineStrategy( + semantic_filter="customer reviews and ratings", + word_count_threshold=20, # Reviews can be shorter + top_k=10, # Get multiple reviews + sim_threshold=0.4 # Allow variety in review content +) +``` + +### 3. Technical Documentation +```python +strategy = CosineStrategy( + semantic_filter="technical specifications documentation", + word_count_threshold=30, + sim_threshold=0.6, # Stricter matching for technical content + max_dist=0.3 # Allow related technical sections +) +``` + +## Advanced Features + +### Custom Clustering +```python +strategy = CosineStrategy( + linkage_method='complete', # Alternative clustering method + max_dist=0.4, # Larger clusters + model_name='sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2' # Multilingual support +) +``` + +### Content Filtering Pipeline +```python +strategy = CosineStrategy( + semantic_filter="pricing plans features", + word_count_threshold=15, + sim_threshold=0.5, + top_k=3 +) + +async def extract_pricing_features(url: str): + async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url=url, + extraction_strategy=strategy + ) + + if result.success: + content = json.loads(result.extracted_content) + return { + 'pricing_features': content, + 'clusters': len(content), + 'similarity_scores': [item['score'] for item in content] + } +``` + +## Best Practices + +1. **Adjust Thresholds Iteratively** + - Start with default values + - Adjust based on results + - Monitor clustering quality + +2. **Choose Appropriate Word Count Thresholds** + - Higher for articles (100+) + - Lower for reviews/comments (20+) + - Medium for product descriptions (50+) + +3. **Optimize Performance** + ```python + strategy = CosineStrategy( + word_count_threshold=10, # Filter early + top_k=5, # Limit results + verbose=True # Monitor performance + ) + ``` + +4. **Handle Different Content Types** + ```python + # For mixed content pages + strategy = CosineStrategy( + semantic_filter="product features", + sim_threshold=0.4, # More flexible matching + max_dist=0.3, # Larger clusters + top_k=3 # Multiple relevant sections + ) + ``` + +## Error Handling + +```python +try: + result = await crawler.arun( + url="https://example.com", + extraction_strategy=strategy + ) + + if result.success: + content = json.loads(result.extracted_content) + if not content: + print("No relevant content found") + else: + print(f"Extraction failed: {result.error_message}") + +except Exception as e: + print(f"Error during extraction: {str(e)}") +``` + +The Cosine Strategy is particularly effective when: +- Content structure is inconsistent +- You need semantic understanding +- You want to find similar content blocks +- Structure-based extraction (CSS/XPath) isn't reliable + +It works well with other strategies and can be used as a pre-processing step for LLM-based extraction. \ No newline at end of file diff --git a/docs/md/full_details/advanced_jsoncss_extraction.md b/docs/md_v2/extraction/css-advanced.md similarity index 100% rename from docs/md/full_details/advanced_jsoncss_extraction.md rename to docs/md_v2/extraction/css-advanced.md diff --git a/docs/md/examples/json_css_extraction.md b/docs/md_v2/extraction/css.md similarity index 98% rename from docs/md/examples/json_css_extraction.md rename to docs/md_v2/extraction/css.md index 933d1f6..3b5075a 100644 --- a/docs/md/examples/json_css_extraction.md +++ b/docs/md_v2/extraction/css.md @@ -139,4 +139,4 @@ This advanced example demonstrates how to: By mastering the `JsonCssExtractionStrategy`, you can efficiently extract structured data from a wide variety of web pages, making it a valuable tool in your web scraping toolkit. -For more details on schema definitions and advanced extraction strategies, check out the[Advanced JsonCssExtraction](../full_details/advanced_jsoncss_extraction.md). \ No newline at end of file +For more details on schema definitions and advanced extraction strategies, check out the[Advanced JsonCssExtraction](./css-advanced.md). \ No newline at end of file diff --git a/docs/md/examples/llm_extraction.md b/docs/md_v2/extraction/llm.md similarity index 98% rename from docs/md/examples/llm_extraction.md rename to docs/md_v2/extraction/llm.md index f4bd74c..ca56214 100644 --- a/docs/md/examples/llm_extraction.md +++ b/docs/md_v2/extraction/llm.md @@ -27,7 +27,7 @@ async def extract_openai_fees(): url=url, word_count_threshold=1, extraction_strategy=LLMExtractionStrategy( - provider="openai/gpt-4o", + provider="openai/gpt-4o", # Or use ollama like provider="ollama/nemotron" api_token=os.getenv('OPENAI_API_KEY'), schema=OpenAIModelFee.model_json_schema(), extraction_type="schema", diff --git a/docs/md_v2/extraction/overview.md b/docs/md_v2/extraction/overview.md new file mode 100644 index 0000000..53a8b87 --- /dev/null +++ b/docs/md_v2/extraction/overview.md @@ -0,0 +1,197 @@ +# Extraction Strategies Overview + +Crawl4AI provides powerful extraction strategies to help you get structured data from web pages. Each strategy is designed for specific use cases and offers different approaches to data extraction. + +## Available Strategies + +### [LLM-Based Extraction](llm.md) + +`LLMExtractionStrategy` uses Language Models to extract structured data from web content. This approach is highly flexible and can understand content semantically. + +```python +from pydantic import BaseModel +from crawl4ai.extraction_strategy import LLMExtractionStrategy + +class Product(BaseModel): + name: str + price: float + description: str + +strategy = LLMExtractionStrategy( + provider="ollama/llama2", + schema=Product.schema(), + instruction="Extract product details from the page" +) + +result = await crawler.arun( + url="https://example.com/product", + extraction_strategy=strategy +) +``` + +**Best for:** +- Complex data structures +- Content requiring interpretation +- Flexible content formats +- Natural language processing + +### [CSS-Based Extraction](css.md) + +`JsonCssExtractionStrategy` extracts data using CSS selectors. This is fast, reliable, and perfect for consistently structured pages. + +```python +from crawl4ai.extraction_strategy import JsonCssExtractionStrategy + +schema = { + "name": "Product Listing", + "baseSelector": ".product-card", + "fields": [ + {"name": "title", "selector": "h2", "type": "text"}, + {"name": "price", "selector": ".price", "type": "text"}, + {"name": "image", "selector": "img", "type": "attribute", "attribute": "src"} + ] +} + +strategy = JsonCssExtractionStrategy(schema) + +result = await crawler.arun( + url="https://example.com/products", + extraction_strategy=strategy +) +``` + +**Best for:** +- E-commerce product listings +- News article collections +- Structured content pages +- High-performance needs + +### [Cosine Strategy](cosine.md) + +`CosineStrategy` uses similarity-based clustering to identify and extract relevant content sections. + +```python +from crawl4ai.extraction_strategy import CosineStrategy + +strategy = CosineStrategy( + semantic_filter="product reviews", # Content focus + word_count_threshold=10, # Minimum words per cluster + sim_threshold=0.3, # Similarity threshold + max_dist=0.2, # Maximum cluster distance + top_k=3 # Number of top clusters to extract +) + +result = await crawler.arun( + url="https://example.com/reviews", + extraction_strategy=strategy +) +``` + +**Best for:** +- Content similarity analysis +- Topic clustering +- Relevant content extraction +- Pattern recognition in text + +## Strategy Selection Guide + +Choose your strategy based on these factors: + +1. **Content Structure** + - Well-structured HTML → Use CSS Strategy + - Natural language text → Use LLM Strategy + - Mixed/Complex content → Use Cosine Strategy + +2. **Performance Requirements** + - Fastest: CSS Strategy + - Moderate: Cosine Strategy + - Variable: LLM Strategy (depends on provider) + +3. **Accuracy Needs** + - Highest structure accuracy: CSS Strategy + - Best semantic understanding: LLM Strategy + - Best content relevance: Cosine Strategy + +## Combining Strategies + +You can combine strategies for more powerful extraction: + +```python +# First use CSS strategy for initial structure +css_result = await crawler.arun( + url="https://example.com", + extraction_strategy=css_strategy +) + +# Then use LLM for semantic analysis +llm_result = await crawler.arun( + url="https://example.com", + extraction_strategy=llm_strategy +) +``` + +## Common Use Cases + +1. **E-commerce Scraping** + ```python + # CSS Strategy for product listings + schema = { + "name": "Products", + "baseSelector": ".product", + "fields": [ + {"name": "name", "selector": ".title", "type": "text"}, + {"name": "price", "selector": ".price", "type": "text"} + ] + } + ``` + +2. **News Article Extraction** + ```python + # LLM Strategy for article content + class Article(BaseModel): + title: str + content: str + author: str + date: str + + strategy = LLMExtractionStrategy( + provider="ollama/llama2", + schema=Article.schema() + ) + ``` + +3. **Content Analysis** + ```python + # Cosine Strategy for topic analysis + strategy = CosineStrategy( + semantic_filter="technology trends", + top_k=5 + ) + ``` + +## Best Practices + +1. **Choose the Right Strategy** + - Start with CSS for structured data + - Use LLM for complex interpretation + - Try Cosine for content relevance + +2. **Optimize Performance** + - Cache LLM results + - Keep CSS selectors specific + - Tune similarity thresholds + +3. **Handle Errors** + ```python + result = await crawler.arun( + url="https://example.com", + extraction_strategy=strategy + ) + + if not result.success: + print(f"Extraction failed: {result.error_message}") + else: + data = json.loads(result.extracted_content) + ``` + +Each strategy has its strengths and optimal use cases. Explore the detailed documentation for each strategy to learn more about their specific features and configurations. \ No newline at end of file diff --git a/docs/md_v2/index.md b/docs/md_v2/index.md new file mode 100644 index 0000000..65ea6da --- /dev/null +++ b/docs/md_v2/index.md @@ -0,0 +1,113 @@ +# Crawl4AI + +Welcome to the official documentation for Crawl4AI! 🕷️🤖 Crawl4AI is an open-source Python library designed to simplify web crawling and extract useful information from web pages. This documentation will guide you through the features, usage, and customization of Crawl4AI. + +## Introduction + +Crawl4AI has one clear task: to make crawling and data extraction from web pages easy and efficient, especially for large language models (LLMs) and AI applications. Whether you are using it as a REST API or a Python library, Crawl4AI offers a robust and flexible solution with full asynchronous support. + +## Quick Start + +Here's a quick example to show you how easy it is to use Crawl4AI with its asynchronous capabilities: + +```python +import asyncio +from crawl4ai import AsyncWebCrawler + +async def main(): + # Create an instance of AsyncWebCrawler + async with AsyncWebCrawler(verbose=True) as crawler: + # Run the crawler on a URL + result = await crawler.arun(url="https://www.nbcnews.com/business") + + # Print the extracted content + print(result.markdown) + +# Run the async main function +asyncio.run(main()) +``` + +## Key Features ✨ + +- 🆓 Completely free and open-source +- 🚀 Blazing fast performance, outperforming many paid services +- 🤖 LLM-friendly output formats (JSON, cleaned HTML, markdown) +- 📄 Fit markdown generation for extracting main article content. +- 🌐 Multi-browser support (Chromium, Firefox, WebKit) +- 🌍 Supports crawling multiple URLs simultaneously +- 🎨 Extracts and returns all media tags (Images, Audio, and Video) +- 🔗 Extracts all external and internal links +- 📚 Extracts metadata from the page +- 🔄 Custom hooks for authentication, headers, and page modifications +- 🕵️ User-agent customization +- 🖼️ Takes screenshots of pages with enhanced error handling +- 📜 Executes multiple custom JavaScripts before crawling +- 📊 Generates structured output without LLM using JsonCssExtractionStrategy +- 📚 Various chunking strategies: topic-based, regex, sentence, and more +- 🧠 Advanced extraction strategies: cosine clustering, LLM, and more +- 🎯 CSS selector support for precise data extraction +- 📝 Passes instructions/keywords to refine extraction +- 🔒 Proxy support with authentication for enhanced access +- 🔄 Session management for complex multi-page crawling +- 🌐 Asynchronous architecture for improved performance +- 🖼️ Improved image processing with lazy-loading detection +- 🕰️ Enhanced handling of delayed content loading +- 🔑 Custom headers support for LLM interactions +- 🖼️ iframe content extraction for comprehensive analysis +- ⏱️ Flexible timeout and delayed content retrieval options + +## Documentation Structure + +Our documentation is organized into several sections: + +### Basic Usage +- [Installation](basic/installation.md) +- [Quick Start](basic/quickstart.md) +- [Simple Crawling](basic/simple-crawling.md) +- [Browser Configuration](basic/browser-config.md) +- [Content Selection](basic/content-selection.md) +- [Output Formats](basic/output-formats.md) +- [Page Interaction](basic/page-interaction.md) + +### Advanced Features +- [Magic Mode](advanced/magic-mode.md) +- [Session Management](advanced/session-management.md) +- [Hooks & Authentication](advanced/hooks-auth.md) +- [Proxy & Security](advanced/proxy-security.md) +- [Content Processing](advanced/content-processing.md) + +### Extraction & Processing +- [Extraction Strategies Overview](extraction/overview.md) +- [LLM Integration](extraction/llm.md) +- [CSS-Based Extraction](extraction/css.md) +- [Cosine Strategy](extraction/cosine.md) +- [Chunking Strategies](extraction/chunking.md) + +### API Reference +- [AsyncWebCrawler](api/async-webcrawler.md) +- [CrawlResult](api/crawl-result.md) +- [Extraction Strategies](api/strategies.md) +- [arun() Method Parameters](api/arun.md) + +### Examples +- Coming soon! + +## Getting Started + +1. Install Crawl4AI: +```bash +pip install crawl4ai +``` + +2. Check out our [Quick Start Guide](basic/quickstart.md) to begin crawling web pages. + +3. Explore our [examples](https://github.com/unclecode/crawl4ai/tree/main/docs/examples) to see Crawl4AI in action. + +## Support + +For questions, suggestions, or issues: +- GitHub Issues: [Report a Bug](https://github.com/unclecode/crawl4ai/issues) +- Twitter: [@unclecode](https://twitter.com/unclecode) +- Website: [crawl4ai.com](https://crawl4ai.com) + +Happy Crawling! 🕸️🚀 \ No newline at end of file diff --git a/docs/md_v2/tutorial/episode_01_Introduction_to_Crawl4AI_and_Basic_Installation.md b/docs/md_v2/tutorial/episode_01_Introduction_to_Crawl4AI_and_Basic_Installation.md new file mode 100644 index 0000000..fb1846b --- /dev/null +++ b/docs/md_v2/tutorial/episode_01_Introduction_to_Crawl4AI_and_Basic_Installation.md @@ -0,0 +1,51 @@ +# Crawl4AI + +## Episode 1: Introduction to Crawl4AI and Basic Installation + +### Quick Intro +Walk through installation from PyPI, setup, and verification. Show how to install with options like `torch` or `transformer` for advanced capabilities. + +Here's a condensed outline of the **Installation and Setup** video content: + +--- + +1) **Introduction to Crawl4AI**: Briefly explain that Crawl4AI is a powerful tool for web scraping, data extraction, and content processing, with customizable options for various needs. + +2) **Installation Overview**: + + - **Basic Install**: Run `pip install crawl4ai` and `playwright install` (to set up browser dependencies). + + - **Optional Advanced Installs**: + - `pip install crawl4ai[torch]` - Adds PyTorch for clustering. + - `pip install crawl4ai[transformer]` - Adds support for LLM-based extraction. + - `pip install crawl4ai[all]` - Installs all features for complete functionality. + +3) **Verifying the Installation**: + + - Walk through a simple test script to confirm the setup: + ```python + import asyncio + from crawl4ai import AsyncWebCrawler + + async def main(): + async with AsyncWebCrawler(verbose=True) as crawler: + result = await crawler.arun(url="https://www.example.com") + print(result.markdown[:500]) # Show first 500 characters + + asyncio.run(main()) + ``` + - Explain that this script initializes the crawler and runs it on a test URL, displaying part of the extracted content to verify functionality. + +4) **Important Tips**: + + - **Run** `playwright install` **after installation** to set up dependencies. + - **For full performance** on text-related tasks, run `crawl4ai-download-models` after installing with `[torch]`, `[transformer]`, or `[all]` options. + - If you encounter issues, refer to the documentation or GitHub issues. + +5) **Wrap Up**: + + - Introduce the next topic in the series, which will cover Crawl4AI's browser configuration options (like choosing between `chromium`, `firefox`, and `webkit`). + +--- + +This structure provides a concise, effective guide to get viewers up and running with Crawl4AI in minutes. \ No newline at end of file diff --git a/docs/md_v2/tutorial/episode_02_Overview_of_Advanced_Features.md b/docs/md_v2/tutorial/episode_02_Overview_of_Advanced_Features.md new file mode 100644 index 0000000..c4fd09d --- /dev/null +++ b/docs/md_v2/tutorial/episode_02_Overview_of_Advanced_Features.md @@ -0,0 +1,78 @@ +# Crawl4AI + +## Episode 2: Overview of Advanced Features + +### Quick Intro +A general overview of advanced features like hooks, CSS selectors, and JSON CSS extraction. + +Here's a condensed outline for an **Overview of Advanced Features** video covering Crawl4AI's powerful customization and extraction options: + +--- + +### **Overview of Advanced Features** + +1) **Introduction to Advanced Features**: + + - Briefly introduce Crawl4AI’s advanced tools, which let users go beyond basic crawling to customize and fine-tune their scraping workflows. + +2) **Taking Screenshots**: + + - Explain the screenshot capability for capturing page state and verifying content. + - **Example**: + ```python + result = await crawler.arun(url="https://www.example.com", screenshot=True) + ``` + - Mention that screenshots are saved as a base64 string in `result`, allowing easy decoding and saving. + +3) **Media and Link Extraction**: + + - Demonstrate how to pull all media (images, videos) and links (internal and external) from a page for deeper analysis or content gathering. + - **Example**: + ```python + result = await crawler.arun(url="https://www.example.com") + print("Media:", result.media) + print("Links:", result.links) + ``` + +4) **Custom User Agent**: + + - Show how to set a custom user agent to disguise the crawler or simulate specific devices/browsers. + - **Example**: + ```python + result = await crawler.arun(url="https://www.example.com", user_agent="Mozilla/5.0 (compatible; MyCrawler/1.0)") + ``` + +5) **Custom Hooks for Enhanced Control**: + + - Briefly cover how to use hooks, which allow custom actions like setting headers or handling login during the crawl. + - **Example**: Setting a custom header with `before_get_url` hook. + ```python + async def before_get_url(page): + await page.set_extra_http_headers({"X-Test-Header": "test"}) + ``` + +6) **CSS Selectors for Targeted Extraction**: + + - Explain the use of CSS selectors to extract specific elements, ideal for structured data like articles or product details. + - **Example**: + ```python + result = await crawler.arun(url="https://www.example.com", css_selector="h2") + print("H2 Tags:", result.extracted_content) + ``` + +7) **Crawling Inside Iframes**: + + - Mention how enabling `process_iframes=True` allows extracting content within iframes, useful for sites with embedded content or ads. + - **Example**: + ```python + result = await crawler.arun(url="https://www.example.com", process_iframes=True) + ``` + +8) **Wrap-Up**: + + - Summarize these advanced features and how they allow users to customize every part of their web scraping experience. + - Tease upcoming videos where each feature will be explored in detail. + +--- + +This covers each advanced feature with a brief example, providing a useful overview to prepare viewers for the more in-depth videos. \ No newline at end of file diff --git a/docs/md_v2/tutorial/episode_03_Browser_Configurations_&_Headless_Crawling.md b/docs/md_v2/tutorial/episode_03_Browser_Configurations_&_Headless_Crawling.md new file mode 100644 index 0000000..45f1a35 --- /dev/null +++ b/docs/md_v2/tutorial/episode_03_Browser_Configurations_&_Headless_Crawling.md @@ -0,0 +1,65 @@ +# Crawl4AI + +## Episode 3: Browser Configurations & Headless Crawling + +### Quick Intro +Explain browser options (`chromium`, `firefox`, `webkit`) and settings for headless mode, caching, and verbose logging. + +Here’s a streamlined outline for the **Browser Configurations & Headless Crawling** video: + +--- + +### **Browser Configurations & Headless Crawling** + +1) **Overview of Browser Options**: + + - Crawl4AI supports three browser engines: + - **Chromium** (default) - Highly compatible. + - **Firefox** - Great for specialized use cases. + - **Webkit** - Lightweight, ideal for basic needs. + - **Example**: + ```python + # Using Chromium (default) + crawler = AsyncWebCrawler(browser_type="chromium") + + # Using Firefox + crawler = AsyncWebCrawler(browser_type="firefox") + + # Using WebKit + crawler = AsyncWebCrawler(browser_type="webkit") + ``` + +2) **Headless Mode**: + + - Headless mode runs the browser without a visible GUI, making it faster and less resource-intensive. + - To enable or disable: + ```python + # Headless mode (default is True) + crawler = AsyncWebCrawler(headless=True) + + # Disable headless mode for debugging + crawler = AsyncWebCrawler(headless=False) + ``` + +3) **Verbose Logging**: + - Use `verbose=True` to get detailed logs for each action, useful for debugging: + ```python + crawler = AsyncWebCrawler(verbose=True) + ``` + +4) **Running a Basic Crawl with Configuration**: + - Example of a simple crawl with custom browser settings: + ```python + async with AsyncWebCrawler(browser_type="firefox", headless=True, verbose=True) as crawler: + result = await crawler.arun(url="https://www.example.com") + print(result.markdown[:500]) # Show first 500 characters + ``` + - This example uses Firefox in headless mode with logging enabled, demonstrating the flexibility of Crawl4AI’s setup. + +5) **Recap & Next Steps**: + - Recap the power of selecting different browsers and running headless mode for speed and efficiency. + - Tease the next video: **Proxy & Security Settings** for navigating blocked or restricted content and protecting IP identity. + +--- + +This breakdown covers browser configuration essentials in Crawl4AI, providing users with practical steps to optimize their scraping setup. \ No newline at end of file diff --git a/docs/md_v2/tutorial/episode_04_Advanced_Proxy_and_Security_Settings.md b/docs/md_v2/tutorial/episode_04_Advanced_Proxy_and_Security_Settings.md new file mode 100644 index 0000000..ea23596 --- /dev/null +++ b/docs/md_v2/tutorial/episode_04_Advanced_Proxy_and_Security_Settings.md @@ -0,0 +1,90 @@ +# Crawl4AI + +## Episode 4: Advanced Proxy and Security Settings + +### Quick Intro +Showcase proxy configurations (HTTP, SOCKS5, authenticated proxies). Demo: Use rotating proxies and set custom headers to avoid IP blocking and enhance security. + +Here’s a focused outline for the **Proxy and Security Settings** video: + +--- + +### **Proxy & Security Settings** + +1) **Why Use Proxies in Web Crawling**: + + - Proxies are essential for bypassing IP-based restrictions, improving anonymity, and managing rate limits. + - Crawl4AI supports simple proxies, authenticated proxies, and proxy rotation for robust web scraping. + +2) **Basic Proxy Setup**: + + - **Using a Simple Proxy**: + ```python + # HTTP proxy + crawler = AsyncWebCrawler(proxy="http://proxy.example.com:8080") + + # SOCKS proxy + crawler = AsyncWebCrawler(proxy="socks5://proxy.example.com:1080") + ``` + +3) **Authenticated Proxies**: + + - Use `proxy_config` for proxies requiring a username and password: + ```python + proxy_config = { + "server": "http://proxy.example.com:8080", + "username": "user", + "password": "pass" + } + crawler = AsyncWebCrawler(proxy_config=proxy_config) + ``` + +4) **Rotating Proxies**: + + - Rotating proxies helps avoid IP bans by switching IP addresses for each request: + ```python + async def get_next_proxy(): + # Define proxy rotation logic here + return {"server": "http://next.proxy.com:8080"} + + async with AsyncWebCrawler() as crawler: + for url in urls: + proxy = await get_next_proxy() + crawler.update_proxy(proxy) + result = await crawler.arun(url=url) + ``` + - This setup periodically switches the proxy for enhanced security and access. + +5) **Custom Headers for Additional Security**: + + - Set custom headers to mask the crawler’s identity and avoid detection: + ```python + headers = { + "X-Forwarded-For": "203.0.113.195", + "Accept-Language": "en-US,en;q=0.9", + "Cache-Control": "no-cache", + "Pragma": "no-cache" + } + crawler = AsyncWebCrawler(headers=headers) + ``` + +6) **Combining Proxies with Magic Mode for Anti-Bot Protection**: + + - For sites with aggressive bot detection, combine `proxy` settings with `magic=True`: + ```python + async with AsyncWebCrawler(proxy="http://proxy.example.com:8080", headers={"Accept-Language": "en-US"}) as crawler: + result = await crawler.arun( + url="https://example.com", + magic=True # Enables anti-detection features + ) + ``` + - **Magic Mode** automatically enables user simulation, random timing, and browser property masking. + +7) **Wrap Up & Next Steps**: + + - Summarize the importance of proxies and anti-detection in accessing restricted content and avoiding bans. + - Tease the next video: **JavaScript Execution and Handling Dynamic Content** for working with interactive and dynamically loaded pages. + +--- + +This outline provides a practical guide to setting up proxies and security configurations, empowering users to navigate restricted sites while staying undetected. \ No newline at end of file diff --git a/docs/md_v2/tutorial/episode_05_JavaScript_Execution_and_Dynamic_Content_Handling.md b/docs/md_v2/tutorial/episode_05_JavaScript_Execution_and_Dynamic_Content_Handling.md new file mode 100644 index 0000000..98d0968 --- /dev/null +++ b/docs/md_v2/tutorial/episode_05_JavaScript_Execution_and_Dynamic_Content_Handling.md @@ -0,0 +1,97 @@ +# Crawl4AI + +## Episode 5: JavaScript Execution and Dynamic Content Handling + +### Quick Intro +Explain JavaScript code injection with examples (e.g., simulating scrolling, clicking ‘load more’). Demo: Extract content from a page that uses dynamic loading with lazy-loaded images. + +Here’s a focused outline for the **JavaScript Execution and Dynamic Content Handling** video: + +--- + +### **JavaScript Execution & Dynamic Content Handling** + +1) **Why JavaScript Execution Matters**: + + - Many modern websites load content dynamically via JavaScript, requiring special handling to access all elements. + - Crawl4AI can execute JavaScript on pages, enabling it to interact with elements like “load more” buttons, infinite scrolls, and content that appears only after certain actions. + +2) **Basic JavaScript Execution**: + + - Use `js_code` to execute JavaScript commands on a page: + ```python + # Scroll to bottom of the page + result = await crawler.arun( + url="https://example.com", + js_code="window.scrollTo(0, document.body.scrollHeight);" + ) + ``` + - This command scrolls to the bottom, triggering any lazy-loaded or dynamically added content. + +3) **Multiple Commands & Simulating Clicks**: + + - Combine multiple JavaScript commands to interact with elements like “load more” buttons: + ```python + js_commands = [ + "window.scrollTo(0, document.body.scrollHeight);", + "document.querySelector('.load-more').click();" + ] + result = await crawler.arun( + url="https://example.com", + js_code=js_commands + ) + ``` + - This script scrolls down and then clicks the “load more” button, useful for loading additional content blocks. + +4) **Waiting for Dynamic Content**: + + - Use `wait_for` to ensure the page loads specific elements before proceeding: + ```python + result = await crawler.arun( + url="https://example.com", + js_code="window.scrollTo(0, document.body.scrollHeight);", + wait_for="css:.dynamic-content" # Wait for elements with class `.dynamic-content` + ) + ``` + - This example waits until elements with `.dynamic-content` are loaded, helping to capture content that appears after JavaScript actions. + +5) **Handling Complex Dynamic Content (e.g., Infinite Scroll)**: + + - Combine JavaScript execution with conditional waiting to handle infinite scrolls or paginated content: + ```python + result = await crawler.arun( + url="https://example.com", + js_code=[ + "window.scrollTo(0, document.body.scrollHeight);", + "const loadMore = document.querySelector('.load-more'); if (loadMore) loadMore.click();" + ], + wait_for="js:() => document.querySelectorAll('.item').length > 10" # Wait until 10 items are loaded + ) + ``` + - This example scrolls and clicks "load more" repeatedly, waiting each time for a specified number of items to load. + +6) **Complete Example: Dynamic Content Handling with Extraction**: + + - Full example demonstrating a dynamic load and content extraction in one process: + ```python + async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://example.com", + js_code=[ + "window.scrollTo(0, document.body.scrollHeight);", + "document.querySelector('.load-more').click();" + ], + wait_for="css:.main-content", + css_selector=".main-content" + ) + print(result.markdown[:500]) # Output the main content extracted + ``` + +7) **Wrap Up & Next Steps**: + + - Recap how JavaScript execution allows access to dynamic content, enabling powerful interactions. + - Tease the next video: **Content Cleaning and Fit Markdown** to show how Crawl4AI can extract only the most relevant content from complex pages. + +--- + +This outline explains how to handle dynamic content and JavaScript-based interactions effectively, enabling users to scrape and interact with complex, modern websites. \ No newline at end of file diff --git a/docs/md_v2/tutorial/episode_06_Magic_Mode_and_Anti-Bot_Protection.md b/docs/md_v2/tutorial/episode_06_Magic_Mode_and_Anti-Bot_Protection.md new file mode 100644 index 0000000..dfc3e5a --- /dev/null +++ b/docs/md_v2/tutorial/episode_06_Magic_Mode_and_Anti-Bot_Protection.md @@ -0,0 +1,86 @@ +# Crawl4AI + +## Episode 6: Magic Mode and Anti-Bot Protection + +### Quick Intro +Highlight `Magic Mode` and anti-bot features like user simulation, navigator overrides, and timing randomization. Demo: Access a site with anti-bot protection and show how `Magic Mode` seamlessly handles it. + +Here’s a concise outline for the **Magic Mode and Anti-Bot Protection** video: + +--- + +### **Magic Mode & Anti-Bot Protection** + +1) **Why Anti-Bot Protection is Important**: + + - Many websites use bot detection mechanisms to block automated scraping. Crawl4AI’s anti-detection features help avoid IP bans, CAPTCHAs, and access restrictions. + - **Magic Mode** is a one-step solution to enable a range of anti-bot features without complex configuration. + +2) **Enabling Magic Mode**: + + - Simply set `magic=True` to activate Crawl4AI’s full anti-bot suite: + ```python + result = await crawler.arun( + url="https://example.com", + magic=True # Enables all anti-detection features + ) + ``` + - This enables a blend of stealth techniques, including masking automation signals, randomizing timings, and simulating real user behavior. + +3) **What Magic Mode Does Behind the Scenes**: + + - **User Simulation**: Mimics human actions like mouse movements and scrolling. + - **Navigator Overrides**: Hides signals that indicate an automated browser. + - **Timing Randomization**: Adds random delays to simulate natural interaction patterns. + - **Cookie Handling**: Accepts and manages cookies dynamically to avoid triggers from cookie pop-ups. + +4) **Manual Anti-Bot Options (If Not Using Magic Mode)**: + + - For granular control, you can configure individual settings without Magic Mode: + ```python + result = await crawler.arun( + url="https://example.com", + simulate_user=True, # Enables human-like behavior + override_navigator=True # Hides automation fingerprints + ) + ``` + - **Use Cases**: This approach allows more specific adjustments when certain anti-bot features are needed but others are not. + +5) **Combining Proxies with Magic Mode**: + + - To avoid rate limits or IP blocks, combine Magic Mode with a proxy: + ```python + async with AsyncWebCrawler( + proxy="http://proxy.example.com:8080", + headers={"Accept-Language": "en-US"} + ) as crawler: + result = await crawler.arun( + url="https://example.com", + magic=True # Full anti-detection + ) + ``` + - This setup maximizes stealth by pairing anti-bot detection with IP obfuscation. + +6) **Example of Anti-Bot Protection in Action**: + + - Full example with Magic Mode and proxies to scrape a protected page: + ```python + async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://example.com/protected-content", + magic=True, + proxy="http://proxy.example.com:8080", + wait_for="css:.content-loaded" # Wait for the main content to load + ) + print(result.markdown[:500]) # Display first 500 characters of the content + ``` + - This example ensures seamless access to protected content by combining anti-detection and waiting for full content load. + +7) **Wrap Up & Next Steps**: + + - Recap the power of Magic Mode and anti-bot features for handling restricted websites. + - Tease the next video: **Content Cleaning and Fit Markdown** to show how to extract clean and focused content from a page. + +--- + +This outline shows users how to easily avoid bot detection and access restricted content, demonstrating both the power and simplicity of Magic Mode in Crawl4AI. \ No newline at end of file diff --git a/docs/md_v2/tutorial/episode_07_Content_Cleaning_and_Fit_Markdown.md b/docs/md_v2/tutorial/episode_07_Content_Cleaning_and_Fit_Markdown.md new file mode 100644 index 0000000..60ef9ee --- /dev/null +++ b/docs/md_v2/tutorial/episode_07_Content_Cleaning_and_Fit_Markdown.md @@ -0,0 +1,89 @@ +# Crawl4AI + +## Episode 7: Content Cleaning and Fit Markdown + +### Quick Intro +Explain content cleaning options, including `fit_markdown` to keep only the most relevant content. Demo: Extract and compare regular vs. fit markdown from a news site or blog. + +Here’s a streamlined outline for the **Content Cleaning and Fit Markdown** video: + +--- + +### **Content Cleaning & Fit Markdown** + +1) **Overview of Content Cleaning in Crawl4AI**: + + - Explain that web pages often include extra elements like ads, navigation bars, footers, and popups. + - Crawl4AI’s content cleaning features help extract only the main content, reducing noise and enhancing readability. + +2) **Basic Content Cleaning Options**: + + - **Removing Unwanted Elements**: Exclude specific HTML tags, like forms or navigation bars: + ```python + result = await crawler.arun( + url="https://example.com", + word_count_threshold=10, # Filter out blocks with fewer than 10 words + excluded_tags=['form', 'nav'], # Exclude specific tags + remove_overlay_elements=True # Remove popups and modals + ) + ``` + - This example extracts content while excluding forms, navigation, and modal overlays, ensuring clean results. + +3) **Fit Markdown for Main Content Extraction**: + + - **What is Fit Markdown**: Uses advanced analysis to identify the most relevant content (ideal for articles, blogs, and documentation). + - **How it Works**: Analyzes content density, removes boilerplate elements, and maintains formatting for a clear output. + - **Example**: + ```python + result = await crawler.arun(url="https://example.com") + main_content = result.fit_markdown # Extracted main content + print(main_content[:500]) # Display first 500 characters + ``` + - Fit Markdown is especially helpful for long-form content like news articles or blog posts. + +4) **Comparing Fit Markdown with Regular Markdown**: + + - **Fit Markdown** returns the primary content without extraneous elements. + - **Regular Markdown** includes all extracted text in markdown format. + - Example to show the difference: + ```python + all_content = result.markdown # Full markdown + main_content = result.fit_markdown # Only the main content + + print(f"All Content Length: {len(all_content)}") + print(f"Main Content Length: {len(main_content)}") + ``` + - This comparison shows the effectiveness of Fit Markdown in focusing on essential content. + +5) **Media and Metadata Handling with Content Cleaning**: + + - **Media Extraction**: Crawl4AI captures images and videos with metadata like alt text, descriptions, and relevance scores: + ```python + for image in result.media["images"]: + print(f"Source: {image['src']}, Alt Text: {image['alt']}, Relevance Score: {image['score']}") + ``` + - **Use Case**: Useful for saving only relevant images or videos from an article or content-heavy page. + +6) **Example of Clean Content Extraction in Action**: + + - Full example extracting cleaned content and Fit Markdown: + ```python + async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://example.com", + word_count_threshold=10, + excluded_tags=['nav', 'footer'], + remove_overlay_elements=True + ) + print(result.fit_markdown[:500]) # Show main content + ``` + - This example demonstrates content cleaning with settings for filtering noise and focusing on the core text. + +7) **Wrap Up & Next Steps**: + + - Summarize the power of Crawl4AI’s content cleaning features and Fit Markdown for capturing clean, relevant content. + - Tease the next video: **Link Analysis and Smart Filtering** to focus on analyzing and filtering links within crawled pages. + +--- + +This outline covers Crawl4AI’s content cleaning features and the unique benefits of Fit Markdown, showing users how to retrieve focused, high-quality content from web pages. \ No newline at end of file diff --git a/docs/md_v2/tutorial/episode_08_Media_Handling_Images_Videos_and_Audio.md b/docs/md_v2/tutorial/episode_08_Media_Handling_Images_Videos_and_Audio.md new file mode 100644 index 0000000..c0daaca --- /dev/null +++ b/docs/md_v2/tutorial/episode_08_Media_Handling_Images_Videos_and_Audio.md @@ -0,0 +1,116 @@ +# Crawl4AI + +## Episode 8: Media Handling: Images, Videos, and Audio + +### Quick Intro +Showcase Crawl4AI’s media extraction capabilities, including lazy-loaded media and metadata. Demo: Crawl a multimedia page, extract images, and show metadata (alt text, context, relevance score). + +Here’s a clear and focused outline for the **Media Handling: Images, Videos, and Audio** video: + +--- + +### **Media Handling: Images, Videos, and Audio** + +1) **Overview of Media Extraction in Crawl4AI**: + + - Crawl4AI can detect and extract different types of media (images, videos, and audio) along with useful metadata. + - This functionality is essential for gathering visual content from multimedia-heavy pages like e-commerce sites, news articles, and social media feeds. + +2) **Image Extraction and Metadata**: + + - Crawl4AI captures images with detailed metadata, including: + - **Source URL**: The direct URL to the image. + - **Alt Text**: Image description if available. + - **Relevance Score**: A score (0–10) indicating how relevant the image is to the main content. + - **Context**: Text surrounding the image on the page. + - **Example**: + ```python + result = await crawler.arun(url="https://example.com") + + for image in result.media["images"]: + print(f"Source: {image['src']}") + print(f"Alt Text: {image['alt']}") + print(f"Relevance Score: {image['score']}") + print(f"Context: {image['context']}") + ``` + - This example shows how to access each image’s metadata, making it easy to filter for the most relevant visuals. + +3) **Handling Lazy-Loaded Images**: + + - Crawl4AI automatically supports lazy-loaded images, which are commonly used to optimize webpage loading. + - **Example with Wait for Lazy-Loaded Content**: + ```python + result = await crawler.arun( + url="https://example.com", + wait_for="css:img[data-src]", # Wait for lazy-loaded images + delay_before_return_html=2.0 # Allow extra time for images to load + ) + ``` + - This setup waits for lazy-loaded images to appear, ensuring they are fully captured. + +4) **Video Extraction and Metadata**: + + - Crawl4AI captures video elements, including: + - **Source URL**: The video’s direct URL. + - **Type**: Format of the video (e.g., MP4). + - **Thumbnail**: A poster or thumbnail image if available. + - **Duration**: Video length, if metadata is provided. + - **Example**: + ```python + for video in result.media["videos"]: + print(f"Video Source: {video['src']}") + print(f"Type: {video['type']}") + print(f"Thumbnail: {video.get('poster')}") + print(f"Duration: {video.get('duration')}") + ``` + - This allows users to gather video content and relevant details for further processing or analysis. + +5) **Audio Extraction and Metadata**: + + - Audio elements can also be extracted, with metadata like: + - **Source URL**: The audio file’s direct URL. + - **Type**: Format of the audio file (e.g., MP3). + - **Duration**: Length of the audio, if available. + - **Example**: + ```python + for audio in result.media["audios"]: + print(f"Audio Source: {audio['src']}") + print(f"Type: {audio['type']}") + print(f"Duration: {audio.get('duration')}") + ``` + - Useful for sites with podcasts, sound bites, or other audio content. + +6) **Filtering Media by Relevance**: + + - Use metadata like relevance score to filter only the most useful media content: + ```python + relevant_images = [img for img in result.media["images"] if img['score'] > 5] + ``` + - This is especially helpful for content-heavy pages where you only want media directly related to the main content. + +7) **Example: Full Media Extraction with Content Filtering**: + + - Full example extracting images, videos, and audio along with filtering by relevance: + ```python + async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://example.com", + word_count_threshold=10, # Filter content blocks for relevance + exclude_external_images=True # Only keep internal images + ) + + # Display media summaries + print(f"Relevant Images: {len(relevant_images)}") + print(f"Videos: {len(result.media['videos'])}") + print(f"Audio Clips: {len(result.media['audios'])}") + ``` + - This example shows how to capture and filter various media types, focusing on what’s most relevant. + +8) **Wrap Up & Next Steps**: + + - Recap the comprehensive media extraction capabilities, emphasizing how metadata helps users focus on relevant content. + - Tease the next video: **Link Analysis and Smart Filtering** to explore how Crawl4AI handles internal, external, and social media links for more focused data gathering. + +--- + +This outline provides users with a complete guide to handling images, videos, and audio in Crawl4AI, using metadata to enhance relevance and precision in multimedia extraction. diff --git a/docs/md_v2/tutorial/episode_09_Link_Analysis_and_Smart_Filtering.md b/docs/md_v2/tutorial/episode_09_Link_Analysis_and_Smart_Filtering.md new file mode 100644 index 0000000..263d77b --- /dev/null +++ b/docs/md_v2/tutorial/episode_09_Link_Analysis_and_Smart_Filtering.md @@ -0,0 +1,95 @@ +# Crawl4AI + +## Episode 9: Link Analysis and Smart Filtering + +### Quick Intro +Walk through internal and external link classification, social media link filtering, and custom domain exclusion. Demo: Analyze links on a website, focusing on internal navigation vs. external or ad links. + +Here’s a focused outline for the **Link Analysis and Smart Filtering** video: + +--- + +### **Link Analysis & Smart Filtering** + +1) **Importance of Link Analysis in Web Crawling**: + + - Explain that web pages often contain numerous links, including internal links, external links, social media links, and ads. + - Crawl4AI’s link analysis and filtering options help extract only relevant links, enabling more targeted and efficient crawls. + +2) **Automatic Link Classification**: + + - Crawl4AI categorizes links automatically into internal, external, and social media links. + - **Example**: + ```python + result = await crawler.arun(url="https://example.com") + + # Access internal and external links + internal_links = result.links["internal"] + external_links = result.links["external"] + + # Print first few links for each type + print("Internal Links:", internal_links[:3]) + print("External Links:", external_links[:3]) + ``` + +3) **Filtering Out Unwanted Links**: + + - **Exclude External Links**: Remove all links pointing to external sites. + - **Exclude Social Media Links**: Filter out social media domains like Facebook or Twitter. + - **Example**: + ```python + result = await crawler.arun( + url="https://example.com", + exclude_external_links=True, # Remove external links + exclude_social_media_links=True # Remove social media links + ) + ``` + +4) **Custom Domain Filtering**: + + - **Exclude Specific Domains**: Filter links from particular domains, e.g., ad sites. + - **Custom Social Media Domains**: Add additional social media domains if needed. + - **Example**: + ```python + result = await crawler.arun( + url="https://example.com", + exclude_domains=["ads.com", "trackers.com"], + exclude_social_media_domains=["facebook.com", "linkedin.com"] + ) + ``` + +5) **Accessing Link Context and Metadata**: + + - Crawl4AI provides additional metadata for each link, including its text, type (e.g., navigation or content), and surrounding context. + - **Example**: + ```python + for link in result.links["internal"]: + print(f"Link: {link['href']}, Text: {link['text']}, Context: {link['context']}") + ``` + - **Use Case**: Helps users understand the relevance of links based on where they are placed on the page (e.g., navigation vs. article content). + +6) **Example of Comprehensive Link Filtering and Analysis**: + + - Full example combining link filtering, metadata access, and contextual information: + ```python + async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://example.com", + exclude_external_links=True, + exclude_social_media_links=True, + exclude_domains=["ads.com"], + css_selector=".main-content" # Focus only on main content area + ) + for link in result.links["internal"]: + print(f"Internal Link: {link['href']}, Text: {link['text']}, Context: {link['context']}") + ``` + - This example filters unnecessary links, keeping only internal and relevant links from the main content area. + +7) **Wrap Up & Next Steps**: + + - Summarize the benefits of link filtering for efficient crawling and relevant content extraction. + - Tease the next video: **Custom Headers, Identity Management, and User Simulation** to explain how to configure identity settings and simulate user behavior for stealthier crawls. + +--- + +This outline provides a practical overview of Crawl4AI’s link analysis and filtering features, helping users target only essential links while eliminating distractions. \ No newline at end of file diff --git a/docs/md_v2/tutorial/episode_10_Custom_Headers,_Identity,_and_User_Simulation.md b/docs/md_v2/tutorial/episode_10_Custom_Headers,_Identity,_and_User_Simulation.md new file mode 100644 index 0000000..6eb928f --- /dev/null +++ b/docs/md_v2/tutorial/episode_10_Custom_Headers,_Identity,_and_User_Simulation.md @@ -0,0 +1,93 @@ +# Crawl4AI + +## Episode 10: Custom Headers, Identity, and User Simulation + +### Quick Intro +Teach how to use custom headers, user-agent strings, and simulate real user interactions. Demo: Set custom user-agent and headers to access a site that blocks typical crawlers. + +Here’s a concise outline for the **Custom Headers, Identity Management, and User Simulation** video: + +--- + +### **Custom Headers, Identity Management, & User Simulation** + +1) **Why Customize Headers and Identity in Crawling**: + + - Websites often track request headers and browser properties to detect bots. Customizing headers and managing identity help make requests appear more human, improving access to restricted sites. + +2) **Setting Custom Headers**: + + - Customize HTTP headers to mimic genuine browser requests or meet site-specific requirements: + ```python + headers = { + "Accept-Language": "en-US,en;q=0.9", + "X-Requested-With": "XMLHttpRequest", + "Cache-Control": "no-cache" + } + crawler = AsyncWebCrawler(headers=headers) + ``` + - **Use Case**: Customize the `Accept-Language` header to simulate local user settings, or `Cache-Control` to bypass cache for fresh content. + +3) **Setting a Custom User Agent**: + + - Some websites block requests from common crawler user agents. Setting a custom user agent string helps bypass these restrictions: + ```python + crawler = AsyncWebCrawler( + user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" + ) + ``` + - **Tip**: Use user-agent strings from popular browsers (e.g., Chrome, Firefox) to improve access and reduce detection risks. + +4) **User Simulation for Human-like Behavior**: + + - Enable `simulate_user=True` to mimic natural user interactions, such as random timing and simulated mouse movements: + ```python + result = await crawler.arun( + url="https://example.com", + simulate_user=True # Simulates human-like behavior + ) + ``` + - **Behavioral Effects**: Adds subtle variations in interactions, making the crawler harder to detect on bot-protected sites. + +5) **Navigator Overrides and Magic Mode for Full Identity Masking**: + + - Use `override_navigator=True` to mask automation indicators like `navigator.webdriver`, which websites check to detect bots: + ```python + result = await crawler.arun( + url="https://example.com", + override_navigator=True # Masks bot-related signals + ) + ``` + - **Combining with Magic Mode**: For a complete anti-bot setup, combine these identity options with `magic=True` for maximum protection: + ```python + async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://example.com", + magic=True, # Enables all anti-bot detection features + user_agent="Custom-Agent", # Custom agent with Magic Mode + ) + ``` + - This setup includes all anti-detection techniques like navigator masking, random timing, and user simulation. + +6) **Example: Comprehensive Setup for Identity Management**: + + - A full example combining custom headers, user-agent, and user simulation for a realistic browsing profile: + ```python + async with AsyncWebCrawler( + headers={"Accept-Language": "en-US", "Cache-Control": "no-cache"}, + user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/91.0", + simulate_user=True + ) as crawler: + result = await crawler.arun(url="https://example.com/secure-page") + print(result.markdown[:500]) # Display extracted content + ``` + - This example enables detailed customization for evading detection and accessing protected pages smoothly. + +7) **Wrap Up & Next Steps**: + + - Recap the value of headers, user-agent customization, and simulation in bypassing bot detection. + - Tease the next video: **Extraction Strategies: JSON CSS, LLM, and Cosine** to dive into structured data extraction methods for high-quality content retrieval. + +--- + +This outline equips users with tools for managing crawler identity and human-like behavior, essential for accessing bot-protected or restricted websites. \ No newline at end of file diff --git a/docs/md_v2/tutorial/episode_11_1_Extraction_Strategies_JSON_CSS.md b/docs/md_v2/tutorial/episode_11_1_Extraction_Strategies_JSON_CSS.md new file mode 100644 index 0000000..b460ff8 --- /dev/null +++ b/docs/md_v2/tutorial/episode_11_1_Extraction_Strategies_JSON_CSS.md @@ -0,0 +1,186 @@ +Here’s a detailed outline for the **JSON-CSS Extraction Strategy** video, covering all key aspects and supported structures in Crawl4AI: + +--- + +### **10.1 JSON-CSS Extraction Strategy** + +#### **1. Introduction to JSON-CSS Extraction** + - JSON-CSS Extraction is used for pulling structured data from pages with repeated patterns, like product listings, article feeds, or directories. + - This strategy allows defining a schema with CSS selectors and data fields, making it easy to capture nested, list-based, or singular elements. + +#### **2. Basic Schema Structure** + - **Schema Fields**: The schema has two main components: + - `baseSelector`: A CSS selector to locate the main elements you want to extract (e.g., each article or product block). + - `fields`: Defines the data fields for each element, supporting various data types and structures. + +#### **3. Simple Field Extraction** + - **Example HTML**: + ```html +
    +

    Sample Product

    + $19.99 +

    This is a sample product.

    +
    + ``` + - **Schema**: + ```python + schema = { + "baseSelector": ".product", + "fields": [ + {"name": "title", "selector": ".title", "type": "text"}, + {"name": "price", "selector": ".price", "type": "text"}, + {"name": "description", "selector": ".description", "type": "text"} + ] + } + ``` + - **Explanation**: Each field captures text content from specified CSS selectors within each `.product` element. + +#### **4. Supported Field Types: Text, Attribute, HTML, Regex** + - **Field Type Options**: + - `text`: Extracts visible text. + - `attribute`: Captures an HTML attribute (e.g., `src`, `href`). + - `html`: Extracts the raw HTML of an element. + - `regex`: Allows regex patterns to extract part of the text. + + - **Example HTML** (including an image): + ```html +
    +

    Sample Product

    + Product Image + $19.99 +

    Limited time offer.

    +
    + ``` + - **Schema**: + ```python + schema = { + "baseSelector": ".product", + "fields": [ + {"name": "title", "selector": ".title", "type": "text"}, + {"name": "image_url", "selector": ".product-image", "type": "attribute", "attribute": "src"}, + {"name": "price", "selector": ".price", "type": "regex", "pattern": r"\$(\d+\.\d+)"}, + {"name": "description_html", "selector": ".description", "type": "html"} + ] + } + ``` + - **Explanation**: + - `attribute`: Extracts the `src` attribute from `.product-image`. + - `regex`: Extracts the numeric part from `$19.99`. + - `html`: Retrieves the full HTML of the description element. + +#### **5. Nested Field Extraction** + - **Use Case**: Useful when content contains sub-elements, such as an article with author details within it. + - **Example HTML**: + ```html +
    +

    Sample Article

    +
    + John Doe + Writer and editor +
    +
    + ``` + - **Schema**: + ```python + schema = { + "baseSelector": ".article", + "fields": [ + {"name": "title", "selector": ".title", "type": "text"}, + {"name": "author", "type": "nested", "selector": ".author", "fields": [ + {"name": "name", "selector": ".name", "type": "text"}, + {"name": "bio", "selector": ".bio", "type": "text"} + ]} + ] + } + ``` + - **Explanation**: + - `nested`: Extracts `name` and `bio` within `.author`, grouping the author details in a single `author` object. + +#### **6. List and Nested List Extraction** + - **List**: Extracts multiple elements matching the selector as a list. + - **Nested List**: Allows lists within lists, useful for items with sub-lists (e.g., specifications for each product). + - **Example HTML**: + ```html +
    +

    Product with Features

    +
      +
    • Feature 1
    • +
    • Feature 2
    • +
    • Feature 3
    • +
    +
    + ``` + - **Schema**: + ```python + schema = { + "baseSelector": ".product", + "fields": [ + {"name": "title", "selector": ".title", "type": "text"}, + {"name": "features", "type": "list", "selector": ".features .feature", "fields": [ + {"name": "feature", "type": "text"} + ]} + ] + } + ``` + - **Explanation**: + - `list`: Captures each `.feature` item within `.features`, outputting an array of features under the `features` field. + +#### **7. Transformations for Field Values** + - Transformations allow you to modify extracted values (e.g., converting to lowercase). + - Supported transformations: `lowercase`, `uppercase`, `strip`. + - **Example HTML**: + ```html +
    +

    Special Product

    +
    + ``` + - **Schema**: + ```python + schema = { + "baseSelector": ".product", + "fields": [ + {"name": "title", "selector": ".title", "type": "text", "transform": "uppercase"} + ] + } + ``` + - **Explanation**: The `transform` property changes the `title` to uppercase, useful for standardized outputs. + +#### **8. Full JSON-CSS Extraction Example** + - Combining all elements in a single schema example for a comprehensive crawl: + - **Example HTML**: + ```html +
    +

    Featured Product

    + + $99.99 +

    Best product of the year.

    +
      +
    • Durable
    • +
    • Eco-friendly
    • +
    +
    + ``` + - **Schema**: + ```python + schema = { + "baseSelector": ".product", + "fields": [ + {"name": "title", "selector": ".title", "type": "text", "transform": "uppercase"}, + {"name": "image_url", "selector": ".product-image", "type": "attribute", "attribute": "src"}, + {"name": "price", "selector": ".price", "type": "regex", "pattern": r"\$(\d+\.\d+)"}, + {"name": "description", "selector": ".description", "type": "html"}, + {"name": "features", "type": "list", "selector": ".features .feature", "fields": [ + {"name": "feature", "type": "text"} + ]} + ] + } + ``` + - **Explanation**: This schema captures and transforms each aspect of the product, illustrating the JSON-CSS strategy’s versatility for structured extraction. + +#### **9. Wrap Up & Next Steps** + - Summarize JSON-CSS Extraction’s flexibility for structured, pattern-based extraction. + - Tease the next video: **10.2 LLM Extraction Strategy**, focusing on using language models to extract data based on intelligent content analysis. + +--- + +This outline covers each JSON-CSS Extraction option in Crawl4AI, with practical examples and schema configurations, making it a thorough guide for users. diff --git a/docs/md_v2/tutorial/episode_11_2_Extraction_Strategies_LLM.md b/docs/md_v2/tutorial/episode_11_2_Extraction_Strategies_LLM.md new file mode 100644 index 0000000..a9f00e9 --- /dev/null +++ b/docs/md_v2/tutorial/episode_11_2_Extraction_Strategies_LLM.md @@ -0,0 +1,153 @@ +# Crawl4AI + +## Episode 11: Extraction Strategies: JSON CSS, LLM, and Cosine + +### Quick Intro +Introduce JSON CSS Extraction Strategy for structured data, LLM Extraction Strategy for intelligent parsing, and Cosine Strategy for clustering similar content. Demo: Use JSON CSS to scrape product details from an e-commerce site. + +Here’s a comprehensive outline for the **LLM Extraction Strategy** video, covering key details and example applications. + +--- + +### **10.2 LLM Extraction Strategy** + +#### **1. Introduction to LLM Extraction Strategy** + - The LLM Extraction Strategy leverages language models to interpret and extract structured data from complex web content. + - Unlike traditional CSS selectors, this strategy uses natural language instructions and schemas to guide the extraction, ideal for unstructured or diverse content. + - Supports **OpenAI**, **Azure OpenAI**, **HuggingFace**, and **Ollama** models, enabling flexibility with both proprietary and open-source providers. + +#### **2. Key Components of LLM Extraction Strategy** + - **Provider**: Specifies the LLM provider (e.g., OpenAI, HuggingFace, Azure). + - **API Token**: Required for most providers, except Ollama (local LLM model). + - **Instruction**: Custom extraction instructions sent to the model, providing flexibility in how the data is structured and extracted. + - **Schema**: Optional, defines structured fields to organize extracted data into JSON format. + - **Extraction Type**: Supports `"block"` for simpler text blocks or `"schema"` when a structured output format is required. + - **Chunking Parameters**: Breaks down large documents, with options to adjust chunk size and overlap rate for more accurate extraction across lengthy texts. + +#### **3. Basic Extraction Example: OpenAI Model Pricing** + - **Goal**: Extract model names and their input and output fees from the OpenAI pricing page. + - **Schema Definition**: + - **Model Name**: Text for model identification. + - **Input Fee**: Token cost for input processing. + - **Output Fee**: Token cost for output generation. + + - **Schema**: + ```python + class OpenAIModelFee(BaseModel): + model_name: str = Field(..., description="Name of the OpenAI model.") + input_fee: str = Field(..., description="Fee for input token for the OpenAI model.") + output_fee: str = Field(..., description="Fee for output token for the OpenAI model.") + ``` + + - **Example Code**: + ```python + async def extract_openai_pricing(): + async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://openai.com/api/pricing/", + extraction_strategy=LLMExtractionStrategy( + provider="openai/gpt-4o", + api_token=os.getenv("OPENAI_API_KEY"), + schema=OpenAIModelFee.schema(), + extraction_type="schema", + instruction="Extract model names and fees for input and output tokens from the page." + ), + cache_mode=CacheMode.BYPASS + ) + print(result.extracted_content) + ``` + + - **Explanation**: + - The extraction strategy combines a schema and detailed instruction to guide the LLM in capturing structured data. + - Each model’s name, input fee, and output fee are extracted in a JSON format. + +#### **4. Knowledge Graph Extraction Example** + - **Goal**: Extract entities and their relationships from a document for use in a knowledge graph. + - **Schema Definition**: + - **Entities**: Individual items with descriptions (e.g., people, organizations). + - **Relationships**: Connections between entities, including descriptions and relationship types. + + - **Schema**: + ```python + class Entity(BaseModel): + name: str + description: str + + class Relationship(BaseModel): + entity1: Entity + entity2: Entity + description: str + relation_type: str + + class KnowledgeGraph(BaseModel): + entities: List[Entity] + relationships: List[Relationship] + ``` + + - **Example Code**: + ```python + async def extract_knowledge_graph(): + extraction_strategy = LLMExtractionStrategy( + provider="azure/gpt-4o-mini", + api_token=os.getenv("AZURE_API_KEY"), + schema=KnowledgeGraph.schema(), + extraction_type="schema", + instruction="Extract entities and relationships from the content to build a knowledge graph." + ) + async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://example.com/some-article", + extraction_strategy=extraction_strategy, + cache_mode=CacheMode.BYPASS + ) + print(result.extracted_content) + ``` + + - **Explanation**: + - In this setup, the LLM extracts entities and their relationships based on the schema and instruction. + - The schema organizes results into a JSON-based knowledge graph format. + +#### **5. Key Settings in LLM Extraction** + - **Chunking Options**: + - For long pages, set `chunk_token_threshold` to specify maximum token count per section. + - Adjust `overlap_rate` to control the overlap between chunks, useful for contextual consistency. + - **Example**: + ```python + extraction_strategy = LLMExtractionStrategy( + provider="openai/gpt-4", + api_token=os.getenv("OPENAI_API_KEY"), + chunk_token_threshold=3000, + overlap_rate=0.2, # 20% overlap between chunks + instruction="Extract key insights and relationships." + ) + ``` + - This setup ensures that longer texts are divided into manageable chunks with slight overlap, enhancing the quality of extraction. + +#### **6. Flexible Provider Options for LLM Extraction** + - **Using Proprietary Models**: OpenAI, Azure, and HuggingFace provide robust language models, often suited for complex or detailed extractions. + - **Using Open-Source Models**: Ollama and other open-source models can be deployed locally, suitable for offline or cost-effective extraction. + - **Example Call**: + ```python + await extract_structured_data_using_llm("huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct", os.getenv("HUGGINGFACE_API_KEY")) + await extract_structured_data_using_llm("openai/gpt-4o", os.getenv("OPENAI_API_KEY")) + await extract_structured_data_using_llm("ollama/llama3.2") + ``` + +#### **7. Complete Example of LLM Extraction Setup** + - Code to run both the OpenAI pricing and Knowledge Graph extractions, using various providers: + ```python + async def main(): + await extract_openai_pricing() + await extract_knowledge_graph() + + if __name__ == "__main__": + asyncio.run(main()) + ``` + +#### **8. Wrap Up & Next Steps** + - Recap the power of LLM extraction for handling unstructured or complex data extraction tasks. + - Tease the next video: **10.3 Cosine Similarity Strategy** for clustering similar content based on semantic similarity. + +--- + +This outline explains LLM Extraction in Crawl4AI, with examples showing how to extract structured data using custom schemas and instructions. It demonstrates flexibility with multiple providers, ensuring practical application for different use cases. diff --git a/docs/md_v2/tutorial/episode_11_3_Extraction_Strategies_Cosine.md b/docs/md_v2/tutorial/episode_11_3_Extraction_Strategies_Cosine.md new file mode 100644 index 0000000..6100ae4 --- /dev/null +++ b/docs/md_v2/tutorial/episode_11_3_Extraction_Strategies_Cosine.md @@ -0,0 +1,136 @@ +# Crawl4AI + +## Episode 11: Extraction Strategies: JSON CSS, LLM, and Cosine + +### Quick Intro +Introduce JSON CSS Extraction Strategy for structured data, LLM Extraction Strategy for intelligent parsing, and Cosine Strategy for clustering similar content. Demo: Use JSON CSS to scrape product details from an e-commerce site. + +Here’s a structured outline for the **Cosine Similarity Strategy** video, covering key concepts, configuration, and a practical example. + +--- + +### **10.3 Cosine Similarity Strategy** + +#### **1. Introduction to Cosine Similarity Strategy** + - The Cosine Similarity Strategy clusters content by semantic similarity, offering an efficient alternative to LLM-based extraction, especially when speed is a priority. + - Ideal for grouping similar sections of text, this strategy is well-suited for pages with content sections that may need to be classified or tagged, like news articles, product descriptions, or reviews. + +#### **2. Key Configuration Options** + - **semantic_filter**: A keyword-based filter to focus on relevant content. + - **word_count_threshold**: Minimum number of words per cluster, filtering out shorter, less meaningful clusters. + - **max_dist**: Maximum allowable distance between elements in clusters, impacting cluster tightness. + - **linkage_method**: Method for hierarchical clustering, such as `'ward'` (for well-separated clusters). + - **top_k**: Specifies the number of top categories for each cluster. + - **model_name**: Defines the model for embeddings, such as `sentence-transformers/all-MiniLM-L6-v2`. + - **sim_threshold**: Minimum similarity threshold for filtering, allowing control over cluster relevance. + +#### **3. How Cosine Similarity Clustering Works** + - **Step 1**: Embeddings are generated for each text section, transforming them into vectors that capture semantic meaning. + - **Step 2**: Hierarchical clustering groups similar sections based on cosine similarity, forming clusters with related content. + - **Step 3**: Clusters are filtered based on word count, removing those below the `word_count_threshold`. + - **Step 4**: Each cluster is then categorized with tags, if enabled, providing context to each grouped content section. + +#### **4. Example Use Case: Clustering Blog Article Sections** + - **Goal**: Group related sections of a blog or news page to identify distinct topics or discussion areas. + - **Example HTML Sections**: + ```text + "The economy is showing signs of recovery, with markets up this quarter.", + "In the sports world, several major teams are preparing for the upcoming season.", + "New advancements in AI technology are reshaping the tech landscape.", + "Market analysts are optimistic about continued growth in tech stocks." + ``` + + - **Code Setup**: + ```python + async def extract_blog_sections(): + extraction_strategy = CosineStrategy( + word_count_threshold=15, + max_dist=0.3, + sim_threshold=0.2, + model_name="sentence-transformers/all-MiniLM-L6-v2", + top_k=2 + ) + async with AsyncWebCrawler() as crawler: + url = "https://example.com/blog-page" + result = await crawler.arun( + url=url, + extraction_strategy=extraction_strategy, + cache_mode=CacheMode.BYPASS + ) + print(result.extracted_content) + ``` + + - **Explanation**: + - **word_count_threshold**: Ensures only clusters with meaningful content are included. + - **sim_threshold**: Filters out clusters with low similarity, focusing on closely related sections. + - **top_k**: Selects top tags, useful for identifying main topics. + +#### **5. Applying Semantic Filtering with Cosine Similarity** + - **Semantic Filter**: Filters sections based on relevance to a specific keyword, such as “technology” for tech articles. + - **Example Code**: + ```python + extraction_strategy = CosineStrategy( + semantic_filter="technology", + word_count_threshold=10, + max_dist=0.25, + model_name="sentence-transformers/all-MiniLM-L6-v2" + ) + ``` + - **Explanation**: + - **semantic_filter**: Only sections with high similarity to the “technology” keyword will be included in the clustering, making it easy to focus on specific topics within a mixed-content page. + +#### **6. Clustering Product Reviews by Similarity** + - **Goal**: Organize product reviews by themes, such as “price,” “quality,” or “durability.” + - **Example Reviews**: + ```text + "The quality of this product is outstanding and well worth the price.", + "I found the product to be durable but a bit overpriced.", + "Great value for the money and long-lasting.", + "The build quality is good, but I expected a lower price point." + ``` + + - **Code Setup**: + ```python + async def extract_product_reviews(): + extraction_strategy = CosineStrategy( + word_count_threshold=20, + max_dist=0.35, + sim_threshold=0.25, + model_name="sentence-transformers/all-MiniLM-L6-v2" + ) + async with AsyncWebCrawler() as crawler: + url = "https://example.com/product-reviews" + result = await crawler.arun( + url=url, + extraction_strategy=extraction_strategy, + cache_mode=CacheMode.BYPASS + ) + print(result.extracted_content) + ``` + + - **Explanation**: + - This configuration clusters similar reviews, grouping feedback by common themes, helping businesses understand customer sentiments around particular product aspects. + +#### **7. Performance Advantages of Cosine Strategy** + - **Speed**: The Cosine Similarity Strategy is faster than LLM-based extraction, as it doesn’t rely on API calls to external LLMs. + - **Local Processing**: The strategy runs locally with pre-trained sentence embeddings, ideal for high-throughput scenarios where cost and latency are concerns. + - **Comparison**: With a well-optimized local model, this method can perform clustering on large datasets quickly, making it suitable for tasks requiring rapid, repeated analysis. + +#### **8. Full Code Example for Clustering News Articles** + - **Code**: + ```python + async def main(): + await extract_blog_sections() + await extract_product_reviews() + + if __name__ == "__main__": + asyncio.run(main()) + ``` + +#### **9. Wrap Up & Next Steps** + - Recap the efficiency and effectiveness of Cosine Similarity for clustering related content quickly. + - Close with a reminder of Crawl4AI’s flexibility across extraction strategies, and prompt users to experiment with different settings to optimize clustering for their specific content. + +--- + +This outline covers Cosine Similarity Strategy’s speed and effectiveness, providing examples that showcase its potential for clustering various content types efficiently. diff --git a/docs/md_v2/tutorial/episode_12_Session-Based_Crawling_for_Dynamic_Websites.md b/docs/md_v2/tutorial/episode_12_Session-Based_Crawling_for_Dynamic_Websites.md new file mode 100644 index 0000000..d1ab813 --- /dev/null +++ b/docs/md_v2/tutorial/episode_12_Session-Based_Crawling_for_Dynamic_Websites.md @@ -0,0 +1,140 @@ +# Crawl4AI + +## Episode 12: Session-Based Crawling for Dynamic Websites + +### Quick Intro +Show session management for handling websites with multiple pages or actions (like “load more” buttons). Demo: Crawl a paginated content page, persisting session data across multiple requests. + +Here’s a detailed outline for the **Session-Based Crawling for Dynamic Websites** video, explaining why sessions are necessary, how to use them, and providing practical examples and a visual diagram to illustrate the concept. + +--- + +### **11. Session-Based Crawling for Dynamic Websites** + +#### **1. Introduction to Session-Based Crawling** + - **What is Session-Based Crawling**: Session-based crawling maintains a continuous browsing session across multiple page states, allowing the crawler to interact with a page and retrieve content that loads dynamically or based on user interactions. + - **Why It’s Needed**: + - In static pages, all content is available directly from a single URL. + - In dynamic websites, content often loads progressively or based on user actions (e.g., clicking “load more,” submitting forms, scrolling). + - Session-based crawling helps simulate user actions, capturing content that is otherwise hidden until specific actions are taken. + +#### **2. Conceptual Diagram for Session-Based Crawling** + + ```mermaid + graph TD + Start[Start Session] --> S1[Initial State (S1)] + S1 -->|Crawl| Content1[Extract Content S1] + S1 -->|Action: Click Load More| S2[State S2] + S2 -->|Crawl| Content2[Extract Content S2] + S2 -->|Action: Scroll Down| S3[State S3] + S3 -->|Crawl| Content3[Extract Content S3] + S3 -->|Action: Submit Form| S4[Final State] + S4 -->|Crawl| Content4[Extract Content S4] + Content4 --> End[End Session] + ``` + + - **Explanation of Diagram**: + - **Start**: Initializes the session and opens the starting URL. + - **State Transitions**: Each action (e.g., clicking “load more,” scrolling) transitions to a new state, where additional content becomes available. + - **Session Persistence**: Keeps the same browsing session active, preserving the state and allowing for a sequence of actions to unfold. + - **End**: After reaching the final state, the session ends, and all accumulated content has been extracted. + +#### **3. Key Components of Session-Based Crawling in Crawl4AI** + - **Session ID**: A unique identifier to maintain the state across requests, allowing the crawler to “remember” previous actions. + - **JavaScript Execution**: Executes JavaScript commands (e.g., clicks, scrolls) to simulate interactions. + - **Wait Conditions**: Ensures the crawler waits for content to load in each state before moving on. + - **Sequential State Transitions**: By defining actions and wait conditions between states, the crawler can navigate through the page as a user would. + +#### **4. Basic Session Example: Multi-Step Content Loading** + - **Goal**: Crawl an article feed that requires several “load more” clicks to display additional content. + - **Code**: + ```python + async def crawl_article_feed(): + async with AsyncWebCrawler() as crawler: + session_id = "feed_session" + + for page in range(3): + result = await crawler.arun( + url="https://example.com/articles", + session_id=session_id, + js_code="document.querySelector('.load-more-button').click();" if page > 0 else None, + wait_for="css:.article", + css_selector=".article" # Target article elements + ) + print(f"Page {page + 1}: Extracted {len(result.extracted_content)} articles") + ``` + - **Explanation**: + - **session_id**: Ensures all requests share the same browsing state. + - **js_code**: Clicks the “load more” button after the initial page load, expanding content on each iteration. + - **wait_for**: Ensures articles have loaded after each click before extraction. + +#### **5. Advanced Example: E-Commerce Product Search with Filter Selection** + - **Goal**: Interact with filters on an e-commerce page to extract products based on selected criteria. + - **Example Steps**: + 1. **State 1**: Load the main product page. + 2. **State 2**: Apply a filter (e.g., “On Sale”) by selecting a checkbox. + 3. **State 3**: Scroll to load additional products and capture updated results. + + - **Code**: + ```python + async def extract_filtered_products(): + async with AsyncWebCrawler() as crawler: + session_id = "product_session" + + # Step 1: Open product page + result = await crawler.arun( + url="https://example.com/products", + session_id=session_id, + wait_for="css:.product-item" + ) + + # Step 2: Apply filter (e.g., "On Sale") + result = await crawler.arun( + url="https://example.com/products", + session_id=session_id, + js_code="document.querySelector('#sale-filter-checkbox').click();", + wait_for="css:.product-item" + ) + + # Step 3: Scroll to load additional products + for _ in range(2): # Scroll down twice + result = await crawler.arun( + url="https://example.com/products", + session_id=session_id, + js_code="window.scrollTo(0, document.body.scrollHeight);", + wait_for="css:.product-item" + ) + print(f"Loaded {len(result.extracted_content)} products after scroll") + ``` + - **Explanation**: + - **State Persistence**: Each action (filter selection and scroll) builds on the previous session state. + - **Multiple Interactions**: Combines clicking a filter with scrolling, demonstrating how the session preserves these actions. + +#### **6. Key Benefits of Session-Based Crawling** + - **Accessing Hidden Content**: Retrieves data that loads only after user actions. + - **Simulating User Behavior**: Handles interactive elements such as “load more” buttons, dropdowns, and filters. + - **Maintaining Continuity Across States**: Enables a sequential process, moving logically from one state to the next, capturing all desired content without reloading the initial state each time. + +#### **7. Additional Configuration Tips** + - **Manage Session End**: Always conclude the session after the final state to release resources. + - **Optimize with Wait Conditions**: Use `wait_for` to ensure complete loading before each extraction. + - **Handling Errors in Session-Based Crawling**: Include error handling for interactions that may fail, ensuring robustness across state transitions. + +#### **8. Complete Code Example: Multi-Step Session Workflow** + - **Example**: + ```python + async def main(): + await crawl_article_feed() + await extract_filtered_products() + + if __name__ == "__main__": + asyncio.run(main()) + ``` + +#### **9. Wrap Up & Next Steps** + - Recap the usefulness of session-based crawling for dynamic content extraction. + - Tease the next video: **Hooks and Custom Workflow with AsyncWebCrawler** to cover advanced customization options for further control over the crawling process. + +--- + +This outline covers session-based crawling from both a conceptual and practical perspective, helping users understand its importance, configure it effectively, and use it to handle complex dynamic content. \ No newline at end of file diff --git a/docs/md_v2/tutorial/episode_13_Chunking_Strategies_for_Large_Text_Processing.md b/docs/md_v2/tutorial/episode_13_Chunking_Strategies_for_Large_Text_Processing.md new file mode 100644 index 0000000..eda07e8 --- /dev/null +++ b/docs/md_v2/tutorial/episode_13_Chunking_Strategies_for_Large_Text_Processing.md @@ -0,0 +1,138 @@ +# Crawl4AI + +## Episode 13: Chunking Strategies for Large Text Processing + +### Quick Intro +Explain Regex, NLP, and Fixed-Length chunking, and when to use each. Demo: Chunk a large article or document for processing by topics or sentences. + +Here’s a structured outline for the **Chunking Strategies for Large Text Processing** video, emphasizing how chunking works within extraction and why it’s crucial for effective data aggregation. + +Here’s a structured outline for the **Chunking Strategies for Large Text Processing** video, explaining each strategy, when to use it, and providing examples to illustrate. + +--- + +### **12. Chunking Strategies for Large Text Processing** + +#### **1. Introduction to Chunking in Crawl4AI** + - **What is Chunking**: Chunking is the process of dividing large text into manageable sections or “chunks,” enabling efficient processing in extraction tasks. + - **Why It’s Needed**: + - When processing large text, feeding it directly into an extraction function (like `F(x)`) can overwhelm memory or token limits. + - Chunking breaks down `x` (the text) into smaller pieces, which are processed sequentially or in parallel by the extraction function, with the final result being an aggregation of all chunks’ processed output. + +#### **2. Key Chunking Strategies and Use Cases** + - Crawl4AI offers various chunking strategies to suit different text structures, chunk sizes, and processing requirements. + - **Choosing a Strategy**: Select based on the type of text (e.g., articles, transcripts) and extraction needs (e.g., simple splitting or context-sensitive processing). + +#### **3. Strategy 1: Regex-Based Chunking** + - **Description**: Uses regular expressions to split text based on specified patterns (e.g., paragraphs or section breaks). + - **Use Case**: Ideal for dividing text by paragraphs or larger logical blocks where sections are clearly separated by line breaks or punctuation. + - **Example**: + - **Pattern**: `r'\n\n'` for double line breaks. + ```python + chunker = RegexChunking(patterns=[r'\n\n']) + text_chunks = chunker.chunk(long_text) + print(text_chunks) # Output: List of paragraphs + ``` + - **Pros**: Flexible for pattern-based chunking. + - **Cons**: Limited to text with consistent formatting. + +#### **4. Strategy 2: NLP Sentence-Based Chunking** + - **Description**: Uses NLP to split text by sentences, ensuring grammatically complete segments. + - **Use Case**: Useful for extracting individual statements, such as in news articles, quotes, or legal text. + - **Example**: + ```python + chunker = NlpSentenceChunking() + sentence_chunks = chunker.chunk(long_text) + print(sentence_chunks) # Output: List of sentences + ``` + - **Pros**: Maintains sentence structure, ideal for tasks needing semantic completeness. + - **Cons**: May create very small chunks, which could limit contextual extraction. + +#### **5. Strategy 3: Topic-Based Segmentation Using TextTiling** + - **Description**: Segments text into topics using TextTiling, identifying topic shifts and key segments. + - **Use Case**: Ideal for long articles, reports, or essays where each section covers a different topic. + - **Example**: + ```python + chunker = TopicSegmentationChunking(num_keywords=3) + topic_chunks = chunker.chunk_with_topics(long_text) + print(topic_chunks) # Output: List of topic segments with keywords + ``` + - **Pros**: Groups related content, preserving topical coherence. + - **Cons**: Depends on identifiable topic shifts, which may not be present in all texts. + +#### **6. Strategy 4: Fixed-Length Word Chunking** + - **Description**: Splits text into chunks based on a fixed number of words. + - **Use Case**: Ideal for text where exact segment size is required, such as processing word-limited documents for LLMs. + - **Example**: + ```python + chunker = FixedLengthWordChunking(chunk_size=100) + word_chunks = chunker.chunk(long_text) + print(word_chunks) # Output: List of 100-word chunks + ``` + - **Pros**: Ensures uniform chunk sizes, suitable for token-based extraction limits. + - **Cons**: May split sentences, affecting semantic coherence. + +#### **7. Strategy 5: Sliding Window Chunking** + - **Description**: Uses a fixed window size with a step, creating overlapping chunks to maintain context. + - **Use Case**: Useful for maintaining context across sections, as with documents where context is needed for neighboring sections. + - **Example**: + ```python + chunker = SlidingWindowChunking(window_size=100, step=50) + window_chunks = chunker.chunk(long_text) + print(window_chunks) # Output: List of overlapping word chunks + ``` + - **Pros**: Retains context across adjacent chunks, ideal for complex semantic extraction. + - **Cons**: Overlap increases data size, potentially impacting processing time. + +#### **8. Strategy 6: Overlapping Window Chunking** + - **Description**: Similar to sliding windows but with a defined overlap, allowing chunks to share content at the edges. + - **Use Case**: Suitable for handling long texts with essential overlapping information, like research articles or medical records. + - **Example**: + ```python + chunker = OverlappingWindowChunking(window_size=1000, overlap=100) + overlap_chunks = chunker.chunk(long_text) + print(overlap_chunks) # Output: List of overlapping chunks with defined overlap + ``` + - **Pros**: Allows controlled overlap for consistent content coverage across chunks. + - **Cons**: Redundant data in overlapping areas may increase computation. + +#### **9. Practical Example: Using Chunking with an Extraction Strategy** + - **Goal**: Combine chunking with an extraction strategy to process large text effectively. + - **Example Code**: + ```python + from crawl4ai.extraction_strategy import LLMExtractionStrategy + + async def extract_large_text(): + # Initialize chunker and extraction strategy + chunker = FixedLengthWordChunking(chunk_size=200) + extraction_strategy = LLMExtractionStrategy(provider="openai/gpt-4", api_token="your_api_token") + + # Split text into chunks + text_chunks = chunker.chunk(large_text) + + async with AsyncWebCrawler() as crawler: + for chunk in text_chunks: + result = await crawler.arun( + url="https://example.com", + extraction_strategy=extraction_strategy, + content=chunk + ) + print(result.extracted_content) + ``` + + - **Explanation**: + - `chunker.chunk()`: Divides the `large_text` into smaller segments based on the chosen strategy. + - `extraction_strategy`: Processes each chunk separately, and results are then aggregated to form the final output. + +#### **10. Choosing the Right Chunking Strategy** + - **Text Structure**: If text has clear sections (e.g., paragraphs, topics), use Regex or Topic Segmentation. + - **Extraction Needs**: If context is crucial, consider Sliding or Overlapping Window Chunking. + - **Processing Constraints**: For word-limited extractions (e.g., LLMs with token limits), Fixed-Length Word Chunking is often most effective. + +#### **11. Wrap Up & Next Steps** + - Recap the benefits of each chunking strategy and when to use them in extraction workflows. + - Tease the next video: **Hooks and Custom Workflow with AsyncWebCrawler**, focusing on customizing crawler behavior with hooks for a fine-tuned extraction process. + +--- + +This outline provides a complete understanding of chunking strategies, explaining each method’s strengths and best-use scenarios to help users process large texts effectively in Crawl4AI. \ No newline at end of file diff --git a/docs/md_v2/tutorial/episode_14_Hooks_and_Custom_Workflow_with_AsyncWebCrawler.md b/docs/md_v2/tutorial/episode_14_Hooks_and_Custom_Workflow_with_AsyncWebCrawler.md new file mode 100644 index 0000000..87a3d21 --- /dev/null +++ b/docs/md_v2/tutorial/episode_14_Hooks_and_Custom_Workflow_with_AsyncWebCrawler.md @@ -0,0 +1,185 @@ +# Crawl4AI + +## Episode 14: Hooks and Custom Workflow with AsyncWebCrawler + +### Quick Intro +Cover hooks (`on_browser_created`, `before_goto`, `after_goto`) to add custom workflows. Demo: Use hooks to add custom cookies or headers, log HTML, or trigger specific events on page load. + +Here’s a detailed outline for the **Hooks and Custom Workflow with AsyncWebCrawler** video, covering each hook’s purpose, usage, and example implementations. + +--- + +### **13. Hooks and Custom Workflow with AsyncWebCrawler** + +#### **1. Introduction to Hooks in Crawl4AI** + - **What are Hooks**: Hooks are customizable entry points in the crawling process that allow users to inject custom actions or logic at specific stages. + - **Why Use Hooks**: + - They enable fine-grained control over the crawling workflow. + - Useful for performing additional tasks (e.g., logging, modifying headers) dynamically during the crawl. + - Hooks provide the flexibility to adapt the crawler to complex site structures or unique project needs. + +#### **2. Overview of Available Hooks** + - Crawl4AI offers seven key hooks to modify and control different stages in the crawling lifecycle: + - `on_browser_created` + - `on_user_agent_updated` + - `on_execution_started` + - `before_goto` + - `after_goto` + - `before_return_html` + - `before_retrieve_html` + +#### **3. Hook-by-Hook Explanation and Examples** + +--- + +##### **Hook 1: `on_browser_created`** + - **Purpose**: Triggered right after the browser instance is created. + - **Use Case**: + - Initializing browser-specific settings or performing setup actions. + - Configuring browser extensions or scripts before any page is opened. + - **Example**: + ```python + async def log_browser_creation(browser): + print("Browser instance created:", browser) + + crawler.crawler_strategy.set_hook('on_browser_created', log_browser_creation) + ``` + - **Explanation**: This hook logs the browser creation event, useful for tracking when a new browser instance starts. + +--- + +##### **Hook 2: `on_user_agent_updated`** + - **Purpose**: Called whenever the user agent string is updated. + - **Use Case**: + - Modifying the user agent based on page requirements, e.g., changing to a mobile user agent for mobile-only pages. + - **Example**: + ```python + def update_user_agent(user_agent): + print(f"User Agent Updated: {user_agent}") + + crawler.crawler_strategy.set_hook('on_user_agent_updated', update_user_agent) + crawler.update_user_agent("Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X)") + ``` + - **Explanation**: This hook provides a callback every time the user agent changes, helpful for debugging or dynamically altering user agent settings based on conditions. + +--- + +##### **Hook 3: `on_execution_started`** + - **Purpose**: Called right before the crawler begins any interaction (e.g., JavaScript execution, clicks). + - **Use Case**: + - Performing setup actions, such as inserting cookies or initiating custom scripts. + - **Example**: + ```python + async def log_execution_start(page): + print("Execution started on page:", page.url) + + crawler.crawler_strategy.set_hook('on_execution_started', log_execution_start) + ``` + - **Explanation**: Logs the start of any major interaction on the page, ideal for cases where you want to monitor each interaction. + +--- + +##### **Hook 4: `before_goto`** + - **Purpose**: Triggered before navigating to a new URL with `page.goto()`. + - **Use Case**: + - Modifying request headers or setting up conditions right before the page loads. + - Adding headers or dynamically adjusting options for specific URLs. + - **Example**: + ```python + async def modify_headers_before_goto(page): + await page.set_extra_http_headers({"X-Custom-Header": "CustomValue"}) + print("Custom headers set before navigation") + + crawler.crawler_strategy.set_hook('before_goto', modify_headers_before_goto) + ``` + - **Explanation**: This hook allows injecting headers or altering settings based on the page’s needs, particularly useful for pages with custom requirements. + +--- + +##### **Hook 5: `after_goto`** + - **Purpose**: Executed immediately after a page has loaded (after `page.goto()`). + - **Use Case**: + - Checking the loaded page state, modifying the DOM, or performing post-navigation actions (e.g., scrolling). + - **Example**: + ```python + async def post_navigation_scroll(page): + await page.evaluate("window.scrollTo(0, document.body.scrollHeight)") + print("Scrolled to the bottom after navigation") + + crawler.crawler_strategy.set_hook('after_goto', post_navigation_scroll) + ``` + - **Explanation**: This hook scrolls to the bottom of the page after loading, which can help load dynamically added content like infinite scroll elements. + +--- + +##### **Hook 6: `before_return_html`** + - **Purpose**: Called right before HTML content is retrieved and returned. + - **Use Case**: + - Removing overlays or cleaning up the page for a cleaner HTML extraction. + - **Example**: + ```python + async def remove_advertisements(page, html): + await page.evaluate("document.querySelectorAll('.ad-banner').forEach(el => el.remove());") + print("Advertisements removed before returning HTML") + + crawler.crawler_strategy.set_hook('before_return_html', remove_advertisements) + ``` + - **Explanation**: The hook removes ad banners from the HTML before it’s retrieved, ensuring a cleaner data extraction. + +--- + +##### **Hook 7: `before_retrieve_html`** + - **Purpose**: Runs right before Crawl4AI initiates HTML retrieval. + - **Use Case**: + - Finalizing any page adjustments (e.g., setting timers, waiting for specific elements). + - **Example**: + ```python + async def wait_for_content_before_retrieve(page): + await page.wait_for_selector('.main-content') + print("Main content loaded, ready to retrieve HTML") + + crawler.crawler_strategy.set_hook('before_retrieve_html', wait_for_content_before_retrieve) + ``` + - **Explanation**: This hook waits for the main content to load before retrieving the HTML, ensuring that all essential content is captured. + +#### **4. Setting Hooks in Crawl4AI** + - **How to Set Hooks**: + - Use `set_hook` to define a custom function for each hook. + - Each hook function can be asynchronous (useful for actions like waiting or retrieving async data). + - **Example Setup**: + ```python + crawler.crawler_strategy.set_hook('on_browser_created', log_browser_creation) + crawler.crawler_strategy.set_hook('before_goto', modify_headers_before_goto) + crawler.crawler_strategy.set_hook('after_goto', post_navigation_scroll) + ``` + +#### **5. Complete Example: Using Hooks for a Customized Crawl Workflow** + - **Goal**: Log each key step, set custom headers before navigation, and clean up the page before retrieving HTML. + - **Example Code**: + ```python + async def custom_crawl(): + async with AsyncWebCrawler() as crawler: + # Set hooks for custom workflow + crawler.crawler_strategy.set_hook('on_browser_created', log_browser_creation) + crawler.crawler_strategy.set_hook('before_goto', modify_headers_before_goto) + crawler.crawler_strategy.set_hook('after_goto', post_navigation_scroll) + crawler.crawler_strategy.set_hook('before_return_html', remove_advertisements) + + # Perform the crawl + url = "https://example.com" + result = await crawler.arun(url=url) + print(result.html) # Display or process HTML + ``` + +#### **6. Benefits of Using Hooks in Custom Crawling Workflows** + - **Enhanced Control**: Hooks offer precise control over each stage, allowing adjustments based on content and structure. + - **Efficient Modifications**: Avoid reloading or restarting the session; hooks can alter actions dynamically. + - **Context-Sensitive Actions**: Hooks enable custom logic tailored to specific pages or sections, maximizing extraction quality. + +#### **7. Wrap Up & Next Steps** + - Recap how hooks empower customized workflows in Crawl4AI, enabling flexibility at every stage. + - Tease the next video: **Automating Post-Processing with Crawl4AI**, covering automated steps after data extraction. + +--- + +This outline provides a thorough understanding of hooks, their practical applications, and examples for customizing the crawling workflow in Crawl4AI. \ No newline at end of file diff --git a/docs/md_v2/tutorial/tutorial.md b/docs/md_v2/tutorial/tutorial.md new file mode 100644 index 0000000..7bead84 --- /dev/null +++ b/docs/md_v2/tutorial/tutorial.md @@ -0,0 +1,1789 @@ +# Crawl4AI + +## Episode 1: Introduction to Crawl4AI and Basic Installation + +### Quick Intro +Walk through installation from PyPI, setup, and verification. Show how to install with options like `torch` or `transformer` for advanced capabilities. + +Here's a condensed outline of the **Installation and Setup** video content: + +--- + +1) **Introduction to Crawl4AI**: + + - Briefly explain that Crawl4AI is a powerful tool for web scraping, data extraction, and content processing, with customizable options for various needs. + +2) **Installation Overview**: + + - **Basic Install**: Run `pip install crawl4ai` and `playwright install` (to set up browser dependencies). + - **Optional Advanced Installs**: + - `pip install crawl4ai[torch]` - Adds PyTorch for clustering. + - `pip install crawl4ai[transformer]` - Adds support for LLM-based extraction. + - `pip install crawl4ai[all]` - Installs all features for complete functionality. + +3) **Verifying the Installation**: + + - Walk through a simple test script to confirm the setup: + ```python + import asyncio + from crawl4ai import AsyncWebCrawler, CacheMode + + async def main(): + async with AsyncWebCrawler(verbose=True) as crawler: + result = await crawler.arun(url="https://www.example.com") + print(result.markdown[:500]) # Show first 500 characters + + asyncio.run(main()) + ``` + - Explain that this script initializes the crawler and runs it on a test URL, displaying part of the extracted content to verify functionality. + +4) **Important Tips**: + + - **Run** `playwright install` **after installation** to set up dependencies. + - **For full performance** on text-related tasks, run `crawl4ai-download-models` after installing with `[torch]`, `[transformer]`, or `[all]` options. + - If you encounter issues, refer to the documentation or GitHub issues. + +5) **Wrap Up**: + + - Introduce the next topic in the series, which will cover Crawl4AI's browser configuration options (like choosing between `chromium`, `firefox`, and `webkit`). + +--- + +This structure provides a concise, effective guide to get viewers up and running with Crawl4AI in minutes.# Crawl4AI + +## Episode 2: Overview of Advanced Features + +### Quick Intro +A general overview of advanced features like hooks, CSS selectors, and JSON CSS extraction. + +Here's a condensed outline for an **Overview of Advanced Features** video covering Crawl4AI's powerful customization and extraction options: + +--- + +### **Overview of Advanced Features** + +1) **Introduction to Advanced Features**: + + - Briefly introduce Crawl4AI’s advanced tools, which let users go beyond basic crawling to customize and fine-tune their scraping workflows. + +2) **Taking Screenshots**: + + - Explain the screenshot capability for capturing page state and verifying content. + - **Example**: + ```python + result = await crawler.arun(url="https://www.example.com", screenshot=True) + ``` + - Mention that screenshots are saved as a base64 string in `result`, allowing easy decoding and saving. + +3) **Media and Link Extraction**: + + - Demonstrate how to pull all media (images, videos) and links (internal and external) from a page for deeper analysis or content gathering. + - **Example**: + ```python + result = await crawler.arun(url="https://www.example.com") + print("Media:", result.media) + print("Links:", result.links) + ``` + +4) **Custom User Agent**: + + - Show how to set a custom user agent to disguise the crawler or simulate specific devices/browsers. + - **Example**: + ```python + result = await crawler.arun(url="https://www.example.com", user_agent="Mozilla/5.0 (compatible; MyCrawler/1.0)") + ``` + +5) **Custom Hooks for Enhanced Control**: + + - Briefly cover how to use hooks, which allow custom actions like setting headers or handling login during the crawl. + - **Example**: Setting a custom header with `before_get_url` hook. + ```python + async def before_get_url(page): + await page.set_extra_http_headers({"X-Test-Header": "test"}) + ``` + +6) **CSS Selectors for Targeted Extraction**: + + - Explain the use of CSS selectors to extract specific elements, ideal for structured data like articles or product details. + - **Example**: + ```python + result = await crawler.arun(url="https://www.example.com", css_selector="h2") + print("H2 Tags:", result.extracted_content) + ``` + +7) **Crawling Inside Iframes**: + + - Mention how enabling `process_iframes=True` allows extracting content within iframes, useful for sites with embedded content or ads. + - **Example**: + ```python + result = await crawler.arun(url="https://www.example.com", process_iframes=True) + ``` + +8) **Wrap-Up**: + + - Summarize these advanced features and how they allow users to customize every part of their web scraping experience. + - Tease upcoming videos where each feature will be explored in detail. + +--- + +This covers each advanced feature with a brief example, providing a useful overview to prepare viewers for the more in-depth videos.# Crawl4AI + +## Episode 3: Browser Configurations & Headless Crawling + +### Quick Intro +Explain browser options (`chromium`, `firefox`, `webkit`) and settings for headless mode, caching, and verbose logging. + +Here’s a streamlined outline for the **Browser Configurations & Headless Crawling** video: + +--- + +### **Browser Configurations & Headless Crawling** + +1) **Overview of Browser Options**: + + - Crawl4AI supports three browser engines: + - **Chromium** (default) - Highly compatible. + - **Firefox** - Great for specialized use cases. + - **Webkit** - Lightweight, ideal for basic needs. + - **Example**: + ```python + # Using Chromium (default) + crawler = AsyncWebCrawler(browser_type="chromium") + + # Using Firefox + crawler = AsyncWebCrawler(browser_type="firefox") + + # Using WebKit + crawler = AsyncWebCrawler(browser_type="webkit") + ``` + +2) **Headless Mode**: + + - Headless mode runs the browser without a visible GUI, making it faster and less resource-intensive. + - To enable or disable: + ```python + # Headless mode (default is True) + crawler = AsyncWebCrawler(headless=True) + + # Disable headless mode for debugging + crawler = AsyncWebCrawler(headless=False) + ``` + +3) **Verbose Logging**: + + - Use `verbose=True` to get detailed logs for each action, useful for debugging: + ```python + crawler = AsyncWebCrawler(verbose=True) + ``` + +4) **Running a Basic Crawl with Configuration**: + + - Example of a simple crawl with custom browser settings: + ```python + async with AsyncWebCrawler(browser_type="firefox", headless=True, verbose=True) as crawler: + result = await crawler.arun(url="https://www.example.com") + print(result.markdown[:500]) # Show first 500 characters + ``` + - This example uses Firefox in headless mode with logging enabled, demonstrating the flexibility of Crawl4AI’s setup. + +5) **Recap & Next Steps**: + + - Recap the power of selecting different browsers and running headless mode for speed and efficiency. + - Tease the next video: **Proxy & Security Settings** for navigating blocked or restricted content and protecting IP identity. + +--- + +This breakdown covers browser configuration essentials in Crawl4AI, providing users with practical steps to optimize their scraping setup.# Crawl4AI + +## Episode 4: Advanced Proxy and Security Settings + +### Quick Intro +Showcase proxy configurations (HTTP, SOCKS5, authenticated proxies). Demo: Use rotating proxies and set custom headers to avoid IP blocking and enhance security. + +Here’s a focused outline for the **Proxy and Security Settings** video: + +--- + +### **Proxy & Security Settings** + +1) **Why Use Proxies in Web Crawling**: + + - Proxies are essential for bypassing IP-based restrictions, improving anonymity, and managing rate limits. + - Crawl4AI supports simple proxies, authenticated proxies, and proxy rotation for robust web scraping. + +2) **Basic Proxy Setup**: + + - **Using a Simple Proxy**: + ```python + # HTTP proxy + crawler = AsyncWebCrawler(proxy="http://proxy.example.com:8080") + + # SOCKS proxy + crawler = AsyncWebCrawler(proxy="socks5://proxy.example.com:1080") + ``` + +3) **Authenticated Proxies**: + + - Use `proxy_config` for proxies requiring a username and password: + ```python + proxy_config = { + "server": "http://proxy.example.com:8080", + "username": "user", + "password": "pass" + } + crawler = AsyncWebCrawler(proxy_config=proxy_config) + ``` + +4) **Rotating Proxies**: + + - Rotating proxies helps avoid IP bans by switching IP addresses for each request: + ```python + async def get_next_proxy(): + # Define proxy rotation logic here + return {"server": "http://next.proxy.com:8080"} + + async with AsyncWebCrawler() as crawler: + for url in urls: + proxy = await get_next_proxy() + crawler.update_proxy(proxy) + result = await crawler.arun(url=url) + ``` + - This setup periodically switches the proxy for enhanced security and access. + +5) **Custom Headers for Additional Security**: + + - Set custom headers to mask the crawler’s identity and avoid detection: + ```python + headers = { + "X-Forwarded-For": "203.0.113.195", + "Accept-Language": "en-US,en;q=0.9", + "Cache-Control": "no-cache", + "Pragma": "no-cache" + } + crawler = AsyncWebCrawler(headers=headers) + ``` + +6) **Combining Proxies with Magic Mode for Anti-Bot Protection**: + + - For sites with aggressive bot detection, combine `proxy` settings with `magic=True`: + ```python + async with AsyncWebCrawler(proxy="http://proxy.example.com:8080", headers={"Accept-Language": "en-US"}) as crawler: + result = await crawler.arun( + url="https://example.com", + magic=True # Enables anti-detection features + ) + ``` + - **Magic Mode** automatically enables user simulation, random timing, and browser property masking. + +7) **Wrap Up & Next Steps**: + + - Summarize the importance of proxies and anti-detection in accessing restricted content and avoiding bans. + - Tease the next video: **JavaScript Execution and Handling Dynamic Content** for working with interactive and dynamically loaded pages. + +--- + +This outline provides a practical guide to setting up proxies and security configurations, empowering users to navigate restricted sites while staying undetected.# Crawl4AI + +## Episode 5: JavaScript Execution and Dynamic Content Handling + +### Quick Intro +Explain JavaScript code injection with examples (e.g., simulating scrolling, clicking ‘load more’). Demo: Extract content from a page that uses dynamic loading with lazy-loaded images. + +Here’s a focused outline for the **JavaScript Execution and Dynamic Content Handling** video: + +--- + +### **JavaScript Execution & Dynamic Content Handling** + +1) **Why JavaScript Execution Matters**: + + - Many modern websites load content dynamically via JavaScript, requiring special handling to access all elements. + - Crawl4AI can execute JavaScript on pages, enabling it to interact with elements like “load more” buttons, infinite scrolls, and content that appears only after certain actions. + +2) **Basic JavaScript Execution**: + + - Use `js_code` to execute JavaScript commands on a page: + ```python + # Scroll to bottom of the page + result = await crawler.arun( + url="https://example.com", + js_code="window.scrollTo(0, document.body.scrollHeight);" + ) + ``` + - This command scrolls to the bottom, triggering any lazy-loaded or dynamically added content. + +3) **Multiple Commands & Simulating Clicks**: + + - Combine multiple JavaScript commands to interact with elements like “load more” buttons: + ```python + js_commands = [ + "window.scrollTo(0, document.body.scrollHeight);", + "document.querySelector('.load-more').click();" + ] + result = await crawler.arun( + url="https://example.com", + js_code=js_commands + ) + ``` + - This script scrolls down and then clicks the “load more” button, useful for loading additional content blocks. + +4) **Waiting for Dynamic Content**: + + - Use `wait_for` to ensure the page loads specific elements before proceeding: + ```python + result = await crawler.arun( + url="https://example.com", + js_code="window.scrollTo(0, document.body.scrollHeight);", + wait_for="css:.dynamic-content" # Wait for elements with class `.dynamic-content` + ) + ``` + - This example waits until elements with `.dynamic-content` are loaded, helping to capture content that appears after JavaScript actions. + +5) **Handling Complex Dynamic Content (e.g., Infinite Scroll)**: + + - Combine JavaScript execution with conditional waiting to handle infinite scrolls or paginated content: + ```python + result = await crawler.arun( + url="https://example.com", + js_code=[ + "window.scrollTo(0, document.body.scrollHeight);", + "const loadMore = document.querySelector('.load-more'); if (loadMore) loadMore.click();" + ], + wait_for="js:() => document.querySelectorAll('.item').length > 10" # Wait until 10 items are loaded + ) + ``` + - This example scrolls and clicks "load more" repeatedly, waiting each time for a specified number of items to load. + +6) **Complete Example: Dynamic Content Handling with Extraction**: + + - Full example demonstrating a dynamic load and content extraction in one process: + ```python + async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://example.com", + js_code=[ + "window.scrollTo(0, document.body.scrollHeight);", + "document.querySelector('.load-more').click();" + ], + wait_for="css:.main-content", + css_selector=".main-content" + ) + print(result.markdown[:500]) # Output the main content extracted + ``` + +7) **Wrap Up & Next Steps**: + + - Recap how JavaScript execution allows access to dynamic content, enabling powerful interactions. + - Tease the next video: **Content Cleaning and Fit Markdown** to show how Crawl4AI can extract only the most relevant content from complex pages. + +--- + +This outline explains how to handle dynamic content and JavaScript-based interactions effectively, enabling users to scrape and interact with complex, modern websites.# Crawl4AI + +## Episode 6: Magic Mode and Anti-Bot Protection + +### Quick Intro +Highlight `Magic Mode` and anti-bot features like user simulation, navigator overrides, and timing randomization. Demo: Access a site with anti-bot protection and show how `Magic Mode` seamlessly handles it. + +Here’s a concise outline for the **Magic Mode and Anti-Bot Protection** video: + +--- + +### **Magic Mode & Anti-Bot Protection** + +1) **Why Anti-Bot Protection is Important**: + + - Many websites use bot detection mechanisms to block automated scraping. Crawl4AI’s anti-detection features help avoid IP bans, CAPTCHAs, and access restrictions. + - **Magic Mode** is a one-step solution to enable a range of anti-bot features without complex configuration. + +2) **Enabling Magic Mode**: + + - Simply set `magic=True` to activate Crawl4AI’s full anti-bot suite: + ```python + result = await crawler.arun( + url="https://example.com", + magic=True # Enables all anti-detection features + ) + ``` + - This enables a blend of stealth techniques, including masking automation signals, randomizing timings, and simulating real user behavior. + +3) **What Magic Mode Does Behind the Scenes**: + + - **User Simulation**: Mimics human actions like mouse movements and scrolling. + - **Navigator Overrides**: Hides signals that indicate an automated browser. + - **Timing Randomization**: Adds random delays to simulate natural interaction patterns. + - **Cookie Handling**: Accepts and manages cookies dynamically to avoid triggers from cookie pop-ups. + +4) **Manual Anti-Bot Options (If Not Using Magic Mode)**: + + - For granular control, you can configure individual settings without Magic Mode: + ```python + result = await crawler.arun( + url="https://example.com", + simulate_user=True, # Enables human-like behavior + override_navigator=True # Hides automation fingerprints + ) + ``` + - **Use Cases**: This approach allows more specific adjustments when certain anti-bot features are needed but others are not. + +5) **Combining Proxies with Magic Mode**: + + - To avoid rate limits or IP blocks, combine Magic Mode with a proxy: + ```python + async with AsyncWebCrawler( + proxy="http://proxy.example.com:8080", + headers={"Accept-Language": "en-US"} + ) as crawler: + result = await crawler.arun( + url="https://example.com", + magic=True # Full anti-detection + ) + ``` + - This setup maximizes stealth by pairing anti-bot detection with IP obfuscation. + +6) **Example of Anti-Bot Protection in Action**: + + - Full example with Magic Mode and proxies to scrape a protected page: + ```python + async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://example.com/protected-content", + magic=True, + proxy="http://proxy.example.com:8080", + wait_for="css:.content-loaded" # Wait for the main content to load + ) + print(result.markdown[:500]) # Display first 500 characters of the content + ``` + - This example ensures seamless access to protected content by combining anti-detection and waiting for full content load. + +7) **Wrap Up & Next Steps**: + + - Recap the power of Magic Mode and anti-bot features for handling restricted websites. + - Tease the next video: **Content Cleaning and Fit Markdown** to show how to extract clean and focused content from a page. + +--- + +This outline shows users how to easily avoid bot detection and access restricted content, demonstrating both the power and simplicity of Magic Mode in Crawl4AI.# Crawl4AI + +## Episode 7: Content Cleaning and Fit Markdown + +### Quick Intro +Explain content cleaning options, including `fit_markdown` to keep only the most relevant content. Demo: Extract and compare regular vs. fit markdown from a news site or blog. + +Here’s a streamlined outline for the **Content Cleaning and Fit Markdown** video: + +--- + +### **Content Cleaning & Fit Markdown** + +1) **Overview of Content Cleaning in Crawl4AI**: + + - Explain that web pages often include extra elements like ads, navigation bars, footers, and popups. + - Crawl4AI’s content cleaning features help extract only the main content, reducing noise and enhancing readability. + +2) **Basic Content Cleaning Options**: + + - **Removing Unwanted Elements**: Exclude specific HTML tags, like forms or navigation bars: + ```python + result = await crawler.arun( + url="https://example.com", + word_count_threshold=10, # Filter out blocks with fewer than 10 words + excluded_tags=['form', 'nav'], # Exclude specific tags + remove_overlay_elements=True # Remove popups and modals + ) + ``` + - This example extracts content while excluding forms, navigation, and modal overlays, ensuring clean results. + +3) **Fit Markdown for Main Content Extraction**: + + - **What is Fit Markdown**: Uses advanced analysis to identify the most relevant content (ideal for articles, blogs, and documentation). + - **How it Works**: Analyzes content density, removes boilerplate elements, and maintains formatting for a clear output. + - **Example**: + ```python + result = await crawler.arun(url="https://example.com") + main_content = result.fit_markdown # Extracted main content + print(main_content[:500]) # Display first 500 characters + ``` + - Fit Markdown is especially helpful for long-form content like news articles or blog posts. + +4) **Comparing Fit Markdown with Regular Markdown**: + + - **Fit Markdown** returns the primary content without extraneous elements. + - **Regular Markdown** includes all extracted text in markdown format. + - Example to show the difference: + ```python + all_content = result.markdown # Full markdown + main_content = result.fit_markdown # Only the main content + + print(f"All Content Length: {len(all_content)}") + print(f"Main Content Length: {len(main_content)}") + ``` + - This comparison shows the effectiveness of Fit Markdown in focusing on essential content. + +5) **Media and Metadata Handling with Content Cleaning**: + + - **Media Extraction**: Crawl4AI captures images and videos with metadata like alt text, descriptions, and relevance scores: + ```python + for image in result.media["images"]: + print(f"Source: {image['src']}, Alt Text: {image['alt']}, Relevance Score: {image['score']}") + ``` + - **Use Case**: Useful for saving only relevant images or videos from an article or content-heavy page. + +6) **Example of Clean Content Extraction in Action**: + + - Full example extracting cleaned content and Fit Markdown: + ```python + async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://example.com", + word_count_threshold=10, + excluded_tags=['nav', 'footer'], + remove_overlay_elements=True + ) + print(result.fit_markdown[:500]) # Show main content + ``` + - This example demonstrates content cleaning with settings for filtering noise and focusing on the core text. + +7) **Wrap Up & Next Steps**: + + - Summarize the power of Crawl4AI’s content cleaning features and Fit Markdown for capturing clean, relevant content. + - Tease the next video: **Link Analysis and Smart Filtering** to focus on analyzing and filtering links within crawled pages. + +--- + +This outline covers Crawl4AI’s content cleaning features and the unique benefits of Fit Markdown, showing users how to retrieve focused, high-quality content from web pages.# Crawl4AI + +## Episode 8: Media Handling: Images, Videos, and Audio + +### Quick Intro +Showcase Crawl4AI’s media extraction capabilities, including lazy-loaded media and metadata. Demo: Crawl a multimedia page, extract images, and show metadata (alt text, context, relevance score). + +Here’s a clear and focused outline for the **Media Handling: Images, Videos, and Audio** video: + +--- + +### **Media Handling: Images, Videos, and Audio** + +1) **Overview of Media Extraction in Crawl4AI**: + + - Crawl4AI can detect and extract different types of media (images, videos, and audio) along with useful metadata. + - This functionality is essential for gathering visual content from multimedia-heavy pages like e-commerce sites, news articles, and social media feeds. + +2) **Image Extraction and Metadata**: + + - Crawl4AI captures images with detailed metadata, including: + - **Source URL**: The direct URL to the image. + - **Alt Text**: Image description if available. + - **Relevance Score**: A score (0–10) indicating how relevant the image is to the main content. + - **Context**: Text surrounding the image on the page. + - **Example**: + ```python + result = await crawler.arun(url="https://example.com") + + for image in result.media["images"]: + print(f"Source: {image['src']}") + print(f"Alt Text: {image['alt']}") + print(f"Relevance Score: {image['score']}") + print(f"Context: {image['context']}") + ``` + - This example shows how to access each image’s metadata, making it easy to filter for the most relevant visuals. + +3) **Handling Lazy-Loaded Images**: + + - Crawl4AI automatically supports lazy-loaded images, which are commonly used to optimize webpage loading. + - **Example with Wait for Lazy-Loaded Content**: + ```python + result = await crawler.arun( + url="https://example.com", + wait_for="css:img[data-src]", # Wait for lazy-loaded images + delay_before_return_html=2.0 # Allow extra time for images to load + ) + ``` + - This setup waits for lazy-loaded images to appear, ensuring they are fully captured. + +4) **Video Extraction and Metadata**: + + - Crawl4AI captures video elements, including: + - **Source URL**: The video’s direct URL. + - **Type**: Format of the video (e.g., MP4). + - **Thumbnail**: A poster or thumbnail image if available. + - **Duration**: Video length, if metadata is provided. + - **Example**: + ```python + for video in result.media["videos"]: + print(f"Video Source: {video['src']}") + print(f"Type: {video['type']}") + print(f"Thumbnail: {video.get('poster')}") + print(f"Duration: {video.get('duration')}") + ``` + - This allows users to gather video content and relevant details for further processing or analysis. + +5) **Audio Extraction and Metadata**: + + - Audio elements can also be extracted, with metadata like: + - **Source URL**: The audio file’s direct URL. + - **Type**: Format of the audio file (e.g., MP3). + - **Duration**: Length of the audio, if available. + - **Example**: + ```python + for audio in result.media["audios"]: + print(f"Audio Source: {audio['src']}") + print(f"Type: {audio['type']}") + print(f"Duration: {audio.get('duration')}") + ``` + - Useful for sites with podcasts, sound bites, or other audio content. + +6) **Filtering Media by Relevance**: + + - Use metadata like relevance score to filter only the most useful media content: + ```python + relevant_images = [img for img in result.media["images"] if img['score'] > 5] + ``` + - This is especially helpful for content-heavy pages where you only want media directly related to the main content. + +7) **Example: Full Media Extraction with Content Filtering**: + + - Full example extracting images, videos, and audio along with filtering by relevance: + ```python + async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://example.com", + word_count_threshold=10, # Filter content blocks for relevance + exclude_external_images=True # Only keep internal images + ) + + # Display media summaries + print(f"Relevant Images: {len(relevant_images)}") + print(f"Videos: {len(result.media['videos'])}") + print(f"Audio Clips: {len(result.media['audios'])}") + ``` + - This example shows how to capture and filter various media types, focusing on what’s most relevant. + +8) **Wrap Up & Next Steps**: + + - Recap the comprehensive media extraction capabilities, emphasizing how metadata helps users focus on relevant content. + - Tease the next video: **Link Analysis and Smart Filtering** to explore how Crawl4AI handles internal, external, and social media links for more focused data gathering. + +--- + +This outline provides users with a complete guide to handling images, videos, and audio in Crawl4AI, using metadata to enhance relevance and precision in multimedia extraction.# Crawl4AI + +## Episode 9: Link Analysis and Smart Filtering + +### Quick Intro +Walk through internal and external link classification, social media link filtering, and custom domain exclusion. Demo: Analyze links on a website, focusing on internal navigation vs. external or ad links. + +Here’s a focused outline for the **Link Analysis and Smart Filtering** video: + +--- + +### **Link Analysis & Smart Filtering** + +1) **Importance of Link Analysis in Web Crawling**: + + - Explain that web pages often contain numerous links, including internal links, external links, social media links, and ads. + - Crawl4AI’s link analysis and filtering options help extract only relevant links, enabling more targeted and efficient crawls. + +2) **Automatic Link Classification**: + + - Crawl4AI categorizes links automatically into internal, external, and social media links. + - **Example**: + ```python + result = await crawler.arun(url="https://example.com") + + # Access internal and external links + internal_links = result.links["internal"] + external_links = result.links["external"] + + # Print first few links for each type + print("Internal Links:", internal_links[:3]) + print("External Links:", external_links[:3]) + ``` + +3) **Filtering Out Unwanted Links**: + + - **Exclude External Links**: Remove all links pointing to external sites. + - **Exclude Social Media Links**: Filter out social media domains like Facebook or Twitter. + - **Example**: + ```python + result = await crawler.arun( + url="https://example.com", + exclude_external_links=True, # Remove external links + exclude_social_media_links=True # Remove social media links + ) + ``` + +4) **Custom Domain Filtering**: + + - **Exclude Specific Domains**: Filter links from particular domains, e.g., ad sites. + - **Custom Social Media Domains**: Add additional social media domains if needed. + - **Example**: + ```python + result = await crawler.arun( + url="https://example.com", + exclude_domains=["ads.com", "trackers.com"], + exclude_social_media_domains=["facebook.com", "linkedin.com"] + ) + ``` + +5) **Accessing Link Context and Metadata**: + + - Crawl4AI provides additional metadata for each link, including its text, type (e.g., navigation or content), and surrounding context. + - **Example**: + ```python + for link in result.links["internal"]: + print(f"Link: {link['href']}, Text: {link['text']}, Context: {link['context']}") + ``` + - **Use Case**: Helps users understand the relevance of links based on where they are placed on the page (e.g., navigation vs. article content). + +6) **Example of Comprehensive Link Filtering and Analysis**: + + - Full example combining link filtering, metadata access, and contextual information: + ```python + async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://example.com", + exclude_external_links=True, + exclude_social_media_links=True, + exclude_domains=["ads.com"], + css_selector=".main-content" # Focus only on main content area + ) + for link in result.links["internal"]: + print(f"Internal Link: {link['href']}, Text: {link['text']}, Context: {link['context']}") + ``` + - This example filters unnecessary links, keeping only internal and relevant links from the main content area. + +7) **Wrap Up & Next Steps**: + + - Summarize the benefits of link filtering for efficient crawling and relevant content extraction. + - Tease the next video: **Custom Headers, Identity Management, and User Simulation** to explain how to configure identity settings and simulate user behavior for stealthier crawls. + +--- + +This outline provides a practical overview of Crawl4AI’s link analysis and filtering features, helping users target only essential links while eliminating distractions.# Crawl4AI + +## Episode 10: Custom Headers, Identity, and User Simulation + +### Quick Intro +Teach how to use custom headers, user-agent strings, and simulate real user interactions. Demo: Set custom user-agent and headers to access a site that blocks typical crawlers. + +Here’s a concise outline for the **Custom Headers, Identity Management, and User Simulation** video: + +--- + +### **Custom Headers, Identity Management, & User Simulation** + +1) **Why Customize Headers and Identity in Crawling**: + + - Websites often track request headers and browser properties to detect bots. Customizing headers and managing identity help make requests appear more human, improving access to restricted sites. + +2) **Setting Custom Headers**: + + - Customize HTTP headers to mimic genuine browser requests or meet site-specific requirements: + ```python + headers = { + "Accept-Language": "en-US,en;q=0.9", + "X-Requested-With": "XMLHttpRequest", + "Cache-Control": "no-cache" + } + crawler = AsyncWebCrawler(headers=headers) + ``` + - **Use Case**: Customize the `Accept-Language` header to simulate local user settings, or `Cache-Control` to bypass cache for fresh content. + +3) **Setting a Custom User Agent**: + + - Some websites block requests from common crawler user agents. Setting a custom user agent string helps bypass these restrictions: + ```python + crawler = AsyncWebCrawler( + user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" + ) + ``` + - **Tip**: Use user-agent strings from popular browsers (e.g., Chrome, Firefox) to improve access and reduce detection risks. + +4) **User Simulation for Human-like Behavior**: + + - Enable `simulate_user=True` to mimic natural user interactions, such as random timing and simulated mouse movements: + ```python + result = await crawler.arun( + url="https://example.com", + simulate_user=True # Simulates human-like behavior + ) + ``` + - **Behavioral Effects**: Adds subtle variations in interactions, making the crawler harder to detect on bot-protected sites. + +5) **Navigator Overrides and Magic Mode for Full Identity Masking**: + + - Use `override_navigator=True` to mask automation indicators like `navigator.webdriver`, which websites check to detect bots: + ```python + result = await crawler.arun( + url="https://example.com", + override_navigator=True # Masks bot-related signals + ) + ``` + - **Combining with Magic Mode**: For a complete anti-bot setup, combine these identity options with `magic=True` for maximum protection: + ```python + async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://example.com", + magic=True, # Enables all anti-bot detection features + user_agent="Custom-Agent", # Custom agent with Magic Mode + ) + ``` + - This setup includes all anti-detection techniques like navigator masking, random timing, and user simulation. + +6) **Example: Comprehensive Setup for Identity Management**: + + - A full example combining custom headers, user-agent, and user simulation for a realistic browsing profile: + ```python + async with AsyncWebCrawler( + headers={"Accept-Language": "en-US", "Cache-Control": "no-cache"}, + user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/91.0", + ) as crawler: + result = await crawler.arun( + url="https://example.com/secure-page", + simulate_user=True + ) + print(result.markdown[:500]) # Display extracted content + ``` + - This example enables detailed customization for evading detection and accessing protected pages smoothly. + +7) **Wrap Up & Next Steps**: + + - Recap the value of headers, user-agent customization, and simulation in bypassing bot detection. + - Tease the next video: **Extraction Strategies: JSON CSS, LLM, and Cosine** to dive into structured data extraction methods for high-quality content retrieval. + +--- + +This outline equips users with tools for managing crawler identity and human-like behavior, essential for accessing bot-protected or restricted websites.Here’s a detailed outline for the **JSON-CSS Extraction Strategy** video, covering all key aspects and supported structures in Crawl4AI: + +--- + +### **10.1 JSON-CSS Extraction Strategy** + +#### **1. Introduction to JSON-CSS Extraction** + - JSON-CSS Extraction is used for pulling structured data from pages with repeated patterns, like product listings, article feeds, or directories. + - This strategy allows defining a schema with CSS selectors and data fields, making it easy to capture nested, list-based, or singular elements. + +#### **2. Basic Schema Structure** + - **Schema Fields**: The schema has two main components: + - `baseSelector`: A CSS selector to locate the main elements you want to extract (e.g., each article or product block). + - `fields`: Defines the data fields for each element, supporting various data types and structures. + +#### **3. Simple Field Extraction** + - **Example HTML**: + ```html +
    +

    Sample Product

    + $19.99 +

    This is a sample product.

    +
    + ``` + - **Schema**: + ```python + schema = { + "baseSelector": ".product", + "fields": [ + {"name": "title", "selector": ".title", "type": "text"}, + {"name": "price", "selector": ".price", "type": "text"}, + {"name": "description", "selector": ".description", "type": "text"} + ] + } + ``` + - **Explanation**: Each field captures text content from specified CSS selectors within each `.product` element. + +#### **4. Supported Field Types: Text, Attribute, HTML, Regex** + - **Field Type Options**: + - `text`: Extracts visible text. + - `attribute`: Captures an HTML attribute (e.g., `src`, `href`). + - `html`: Extracts the raw HTML of an element. + - `regex`: Allows regex patterns to extract part of the text. + + - **Example HTML** (including an image): + ```html +
    +

    Sample Product

    + Product Image + $19.99 +

    Limited time offer.

    +
    + ``` + - **Schema**: + ```python + schema = { + "baseSelector": ".product", + "fields": [ + {"name": "title", "selector": ".title", "type": "text"}, + {"name": "image_url", "selector": ".product-image", "type": "attribute", "attribute": "src"}, + {"name": "price", "selector": ".price", "type": "regex", "pattern": r"\$(\d+\.\d+)"}, + {"name": "description_html", "selector": ".description", "type": "html"} + ] + } + ``` + - **Explanation**: + - `attribute`: Extracts the `src` attribute from `.product-image`. + - `regex`: Extracts the numeric part from `$19.99`. + - `html`: Retrieves the full HTML of the description element. + +#### **5. Nested Field Extraction** + - **Use Case**: Useful when content contains sub-elements, such as an article with author details within it. + - **Example HTML**: + ```html +
    +

    Sample Article

    +
    + John Doe + Writer and editor +
    +
    + ``` + - **Schema**: + ```python + schema = { + "baseSelector": ".article", + "fields": [ + {"name": "title", "selector": ".title", "type": "text"}, + {"name": "author", "type": "nested", "selector": ".author", "fields": [ + {"name": "name", "selector": ".name", "type": "text"}, + {"name": "bio", "selector": ".bio", "type": "text"} + ]} + ] + } + ``` + - **Explanation**: + - `nested`: Extracts `name` and `bio` within `.author`, grouping the author details in a single `author` object. + +#### **6. List and Nested List Extraction** + - **List**: Extracts multiple elements matching the selector as a list. + - **Nested List**: Allows lists within lists, useful for items with sub-lists (e.g., specifications for each product). + - **Example HTML**: + ```html +
    +

    Product with Features

    +
      +
    • Feature 1
    • +
    • Feature 2
    • +
    • Feature 3
    • +
    +
    + ``` + - **Schema**: + ```python + schema = { + "baseSelector": ".product", + "fields": [ + {"name": "title", "selector": ".title", "type": "text"}, + {"name": "features", "type": "list", "selector": ".features .feature", "fields": [ + {"name": "feature", "type": "text"} + ]} + ] + } + ``` + - **Explanation**: + - `list`: Captures each `.feature` item within `.features`, outputting an array of features under the `features` field. + +#### **7. Transformations for Field Values** + - Transformations allow you to modify extracted values (e.g., converting to lowercase). + - Supported transformations: `lowercase`, `uppercase`, `strip`. + - **Example HTML**: + ```html +
    +

    Special Product

    +
    + ``` + - **Schema**: + ```python + schema = { + "baseSelector": ".product", + "fields": [ + {"name": "title", "selector": ".title", "type": "text", "transform": "uppercase"} + ] + } + ``` + - **Explanation**: The `transform` property changes the `title` to uppercase, useful for standardized outputs. + +#### **8. Full JSON-CSS Extraction Example** + - Combining all elements in a single schema example for a comprehensive crawl: + - **Example HTML**: + ```html +
    +

    Featured Product

    + + $99.99 +

    Best product of the year.

    +
      +
    • Durable
    • +
    • Eco-friendly
    • +
    +
    + ``` + - **Schema**: + ```python + schema = { + "baseSelector": ".product", + "fields": [ + {"name": "title", "selector": ".title", "type": "text", "transform": "uppercase"}, + {"name": "image_url", "selector": ".product-image", "type": "attribute", "attribute": "src"}, + {"name": "price", "selector": ".price", "type": "regex", "pattern": r"\$(\d+\.\d+)"}, + {"name": "description", "selector": ".description", "type": "html"}, + {"name": "features", "type": "list", "selector": ".features .feature", "fields": [ + {"name": "feature", "type": "text"} + ]} + ] + } + ``` + - **Explanation**: This schema captures and transforms each aspect of the product, illustrating the JSON-CSS strategy’s versatility for structured extraction. + +#### **9. Wrap Up & Next Steps** + - Summarize JSON-CSS Extraction’s flexibility for structured, pattern-based extraction. + - Tease the next video: **10.2 LLM Extraction Strategy**, focusing on using language models to extract data based on intelligent content analysis. + +--- + +This outline covers each JSON-CSS Extraction option in Crawl4AI, with practical examples and schema configurations, making it a thorough guide for users.# Crawl4AI + +## Episode 11: Extraction Strategies: JSON CSS, LLM, and Cosine + +### Quick Intro +Introduce JSON CSS Extraction Strategy for structured data, LLM Extraction Strategy for intelligent parsing, and Cosine Strategy for clustering similar content. Demo: Use JSON CSS to scrape product details from an e-commerce site. + +Here’s a comprehensive outline for the **LLM Extraction Strategy** video, covering key details and example applications. + +--- + +### **10.2 LLM Extraction Strategy** + +#### **1. Introduction to LLM Extraction Strategy** + - The LLM Extraction Strategy leverages language models to interpret and extract structured data from complex web content. + - Unlike traditional CSS selectors, this strategy uses natural language instructions and schemas to guide the extraction, ideal for unstructured or diverse content. + - Supports **OpenAI**, **Azure OpenAI**, **HuggingFace**, and **Ollama** models, enabling flexibility with both proprietary and open-source providers. + +#### **2. Key Components of LLM Extraction Strategy** + - **Provider**: Specifies the LLM provider (e.g., OpenAI, HuggingFace, Azure). + - **API Token**: Required for most providers, except Ollama (local LLM model). + - **Instruction**: Custom extraction instructions sent to the model, providing flexibility in how the data is structured and extracted. + - **Schema**: Optional, defines structured fields to organize extracted data into JSON format. + - **Extraction Type**: Supports `"block"` for simpler text blocks or `"schema"` when a structured output format is required. + - **Chunking Parameters**: Breaks down large documents, with options to adjust chunk size and overlap rate for more accurate extraction across lengthy texts. + +#### **3. Basic Extraction Example: OpenAI Model Pricing** + - **Goal**: Extract model names and their input and output fees from the OpenAI pricing page. + - **Schema Definition**: + - **Model Name**: Text for model identification. + - **Input Fee**: Token cost for input processing. + - **Output Fee**: Token cost for output generation. + + - **Schema**: + ```python + class OpenAIModelFee(BaseModel): + model_name: str = Field(..., description="Name of the OpenAI model.") + input_fee: str = Field(..., description="Fee for input token for the OpenAI model.") + output_fee: str = Field(..., description="Fee for output token for the OpenAI model.") + ``` + + - **Example Code**: + ```python + async def extract_openai_pricing(): + async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://openai.com/api/pricing/", + extraction_strategy=LLMExtractionStrategy( + provider="openai/gpt-4o", + api_token=os.getenv("OPENAI_API_KEY"), + schema=OpenAIModelFee.schema(), + extraction_type="schema", + instruction="Extract model names and fees for input and output tokens from the page." + ), + cache_mode=CacheMode.BYPASS + ) + print(result.extracted_content) + ``` + + - **Explanation**: + - The extraction strategy combines a schema and detailed instruction to guide the LLM in capturing structured data. + - Each model’s name, input fee, and output fee are extracted in a JSON format. + +#### **4. Knowledge Graph Extraction Example** + - **Goal**: Extract entities and their relationships from a document for use in a knowledge graph. + - **Schema Definition**: + - **Entities**: Individual items with descriptions (e.g., people, organizations). + - **Relationships**: Connections between entities, including descriptions and relationship types. + + - **Schema**: + ```python + class Entity(BaseModel): + name: str + description: str + + class Relationship(BaseModel): + entity1: Entity + entity2: Entity + description: str + relation_type: str + + class KnowledgeGraph(BaseModel): + entities: List[Entity] + relationships: List[Relationship] + ``` + + - **Example Code**: + ```python + async def extract_knowledge_graph(): + extraction_strategy = LLMExtractionStrategy( + provider="azure/gpt-4o-mini", + api_token=os.getenv("AZURE_API_KEY"), + schema=KnowledgeGraph.schema(), + extraction_type="schema", + instruction="Extract entities and relationships from the content to build a knowledge graph." + ) + async with AsyncWebCrawler() as crawler: + result = await crawler.arun( + url="https://example.com/some-article", + extraction_strategy=extraction_strategy, + cache_mode=CacheMode.BYPASS + ) + print(result.extracted_content) + ``` + + - **Explanation**: + - In this setup, the LLM extracts entities and their relationships based on the schema and instruction. + - The schema organizes results into a JSON-based knowledge graph format. + +#### **5. Key Settings in LLM Extraction** + - **Chunking Options**: + - For long pages, set `chunk_token_threshold` to specify maximum token count per section. + - Adjust `overlap_rate` to control the overlap between chunks, useful for contextual consistency. + - **Example**: + ```python + extraction_strategy = LLMExtractionStrategy( + provider="openai/gpt-4", + api_token=os.getenv("OPENAI_API_KEY"), + chunk_token_threshold=3000, + overlap_rate=0.2, # 20% overlap between chunks + instruction="Extract key insights and relationships." + ) + ``` + - This setup ensures that longer texts are divided into manageable chunks with slight overlap, enhancing the quality of extraction. + +#### **6. Flexible Provider Options for LLM Extraction** + - **Using Proprietary Models**: OpenAI, Azure, and HuggingFace provide robust language models, often suited for complex or detailed extractions. + - **Using Open-Source Models**: Ollama and other open-source models can be deployed locally, suitable for offline or cost-effective extraction. + - **Example Call**: + ```python + await extract_structured_data_using_llm("huggingface/meta-llama/Meta-Llama-3.1-8B-Instruct", os.getenv("HUGGINGFACE_API_KEY")) + await extract_structured_data_using_llm("openai/gpt-4o", os.getenv("OPENAI_API_KEY")) + await extract_structured_data_using_llm("ollama/llama3.2") + ``` + +#### **7. Complete Example of LLM Extraction Setup** + - Code to run both the OpenAI pricing and Knowledge Graph extractions, using various providers: + ```python + async def main(): + await extract_openai_pricing() + await extract_knowledge_graph() + + if __name__ == "__main__": + asyncio.run(main()) + ``` + +#### **8. Wrap Up & Next Steps** + - Recap the power of LLM extraction for handling unstructured or complex data extraction tasks. + - Tease the next video: **10.3 Cosine Similarity Strategy** for clustering similar content based on semantic similarity. + +--- + +This outline explains LLM Extraction in Crawl4AI, with examples showing how to extract structured data using custom schemas and instructions. It demonstrates flexibility with multiple providers, ensuring practical application for different use cases.# Crawl4AI + +## Episode 11: Extraction Strategies: JSON CSS, LLM, and Cosine + +### Quick Intro +Introduce JSON CSS Extraction Strategy for structured data, LLM Extraction Strategy for intelligent parsing, and Cosine Strategy for clustering similar content. Demo: Use JSON CSS to scrape product details from an e-commerce site. + +Here’s a structured outline for the **Cosine Similarity Strategy** video, covering key concepts, configuration, and a practical example. + +--- + +### **10.3 Cosine Similarity Strategy** + +#### **1. Introduction to Cosine Similarity Strategy** + - The Cosine Similarity Strategy clusters content by semantic similarity, offering an efficient alternative to LLM-based extraction, especially when speed is a priority. + - Ideal for grouping similar sections of text, this strategy is well-suited for pages with content sections that may need to be classified or tagged, like news articles, product descriptions, or reviews. + +#### **2. Key Configuration Options** + - **semantic_filter**: A keyword-based filter to focus on relevant content. + - **word_count_threshold**: Minimum number of words per cluster, filtering out shorter, less meaningful clusters. + - **max_dist**: Maximum allowable distance between elements in clusters, impacting cluster tightness. + - **linkage_method**: Method for hierarchical clustering, such as `'ward'` (for well-separated clusters). + - **top_k**: Specifies the number of top categories for each cluster. + - **model_name**: Defines the model for embeddings, such as `sentence-transformers/all-MiniLM-L6-v2`. + - **sim_threshold**: Minimum similarity threshold for filtering, allowing control over cluster relevance. + +#### **3. How Cosine Similarity Clustering Works** + - **Step 1**: Embeddings are generated for each text section, transforming them into vectors that capture semantic meaning. + - **Step 2**: Hierarchical clustering groups similar sections based on cosine similarity, forming clusters with related content. + - **Step 3**: Clusters are filtered based on word count, removing those below the `word_count_threshold`. + - **Step 4**: Each cluster is then categorized with tags, if enabled, providing context to each grouped content section. + +#### **4. Example Use Case: Clustering Blog Article Sections** + - **Goal**: Group related sections of a blog or news page to identify distinct topics or discussion areas. + - **Example HTML Sections**: + ```text + "The economy is showing signs of recovery, with markets up this quarter.", + "In the sports world, several major teams are preparing for the upcoming season.", + "New advancements in AI technology are reshaping the tech landscape.", + "Market analysts are optimistic about continued growth in tech stocks." + ``` + + - **Code Setup**: + ```python + async def extract_blog_sections(): + extraction_strategy = CosineStrategy( + word_count_threshold=15, + max_dist=0.3, + sim_threshold=0.2, + model_name="sentence-transformers/all-MiniLM-L6-v2", + top_k=2 + ) + async with AsyncWebCrawler() as crawler: + url = "https://example.com/blog-page" + result = await crawler.arun( + url=url, + extraction_strategy=extraction_strategy, + cache_mode=CacheMode.BYPASS + ) + print(result.extracted_content) + ``` + + - **Explanation**: + - **word_count_threshold**: Ensures only clusters with meaningful content are included. + - **sim_threshold**: Filters out clusters with low similarity, focusing on closely related sections. + - **top_k**: Selects top tags, useful for identifying main topics. + +#### **5. Applying Semantic Filtering with Cosine Similarity** + - **Semantic Filter**: Filters sections based on relevance to a specific keyword, such as “technology” for tech articles. + - **Example Code**: + ```python + extraction_strategy = CosineStrategy( + semantic_filter="technology", + word_count_threshold=10, + max_dist=0.25, + model_name="sentence-transformers/all-MiniLM-L6-v2" + ) + ``` + - **Explanation**: + - **semantic_filter**: Only sections with high similarity to the “technology” keyword will be included in the clustering, making it easy to focus on specific topics within a mixed-content page. + +#### **6. Clustering Product Reviews by Similarity** + - **Goal**: Organize product reviews by themes, such as “price,” “quality,” or “durability.” + - **Example Reviews**: + ```text + "The quality of this product is outstanding and well worth the price.", + "I found the product to be durable but a bit overpriced.", + "Great value for the money and long-lasting.", + "The build quality is good, but I expected a lower price point." + ``` + + - **Code Setup**: + ```python + async def extract_product_reviews(): + extraction_strategy = CosineStrategy( + word_count_threshold=20, + max_dist=0.35, + sim_threshold=0.25, + model_name="sentence-transformers/all-MiniLM-L6-v2" + ) + async with AsyncWebCrawler() as crawler: + url = "https://example.com/product-reviews" + result = await crawler.arun( + url=url, + extraction_strategy=extraction_strategy, + cache_mode=CacheMode.BYPASS + ) + print(result.extracted_content) + ``` + + - **Explanation**: + - This configuration clusters similar reviews, grouping feedback by common themes, helping businesses understand customer sentiments around particular product aspects. + +#### **7. Performance Advantages of Cosine Strategy** + - **Speed**: The Cosine Similarity Strategy is faster than LLM-based extraction, as it doesn’t rely on API calls to external LLMs. + - **Local Processing**: The strategy runs locally with pre-trained sentence embeddings, ideal for high-throughput scenarios where cost and latency are concerns. + - **Comparison**: With a well-optimized local model, this method can perform clustering on large datasets quickly, making it suitable for tasks requiring rapid, repeated analysis. + +#### **8. Full Code Example for Clustering News Articles** + - **Code**: + ```python + async def main(): + await extract_blog_sections() + await extract_product_reviews() + + if __name__ == "__main__": + asyncio.run(main()) + ``` + +#### **9. Wrap Up & Next Steps** + - Recap the efficiency and effectiveness of Cosine Similarity for clustering related content quickly. + - Close with a reminder of Crawl4AI’s flexibility across extraction strategies, and prompt users to experiment with different settings to optimize clustering for their specific content. + +--- + +This outline covers Cosine Similarity Strategy’s speed and effectiveness, providing examples that showcase its potential for clustering various content types efficiently.# Crawl4AI + +## Episode 12: Session-Based Crawling for Dynamic Websites + +### Quick Intro +Show session management for handling websites with multiple pages or actions (like “load more” buttons). Demo: Crawl a paginated content page, persisting session data across multiple requests. + +Here’s a detailed outline for the **Session-Based Crawling for Dynamic Websites** video, explaining why sessions are necessary, how to use them, and providing practical examples and a visual diagram to illustrate the concept. + +--- + +### **11. Session-Based Crawling for Dynamic Websites** + +#### **1. Introduction to Session-Based Crawling** + - **What is Session-Based Crawling**: Session-based crawling maintains a continuous browsing session across multiple page states, allowing the crawler to interact with a page and retrieve content that loads dynamically or based on user interactions. + - **Why It’s Needed**: + - In static pages, all content is available directly from a single URL. + - In dynamic websites, content often loads progressively or based on user actions (e.g., clicking “load more,” submitting forms, scrolling). + - Session-based crawling helps simulate user actions, capturing content that is otherwise hidden until specific actions are taken. + +#### **2. Conceptual Diagram for Session-Based Crawling** + + ```mermaid + graph TD + Start[Start Session] --> S1[Initial State (S1)] + S1 -->|Crawl| Content1[Extract Content S1] + S1 -->|Action: Click Load More| S2[State S2] + S2 -->|Crawl| Content2[Extract Content S2] + S2 -->|Action: Scroll Down| S3[State S3] + S3 -->|Crawl| Content3[Extract Content S3] + S3 -->|Action: Submit Form| S4[Final State] + S4 -->|Crawl| Content4[Extract Content S4] + Content4 --> End[End Session] + ``` + + - **Explanation of Diagram**: + - **Start**: Initializes the session and opens the starting URL. + - **State Transitions**: Each action (e.g., clicking “load more,” scrolling) transitions to a new state, where additional content becomes available. + - **Session Persistence**: Keeps the same browsing session active, preserving the state and allowing for a sequence of actions to unfold. + - **End**: After reaching the final state, the session ends, and all accumulated content has been extracted. + +#### **3. Key Components of Session-Based Crawling in Crawl4AI** + - **Session ID**: A unique identifier to maintain the state across requests, allowing the crawler to “remember” previous actions. + - **JavaScript Execution**: Executes JavaScript commands (e.g., clicks, scrolls) to simulate interactions. + - **Wait Conditions**: Ensures the crawler waits for content to load in each state before moving on. + - **Sequential State Transitions**: By defining actions and wait conditions between states, the crawler can navigate through the page as a user would. + +#### **4. Basic Session Example: Multi-Step Content Loading** + - **Goal**: Crawl an article feed that requires several “load more” clicks to display additional content. + - **Code**: + ```python + async def crawl_article_feed(): + async with AsyncWebCrawler() as crawler: + session_id = "feed_session" + + for page in range(3): + result = await crawler.arun( + url="https://example.com/articles", + session_id=session_id, + js_code="document.querySelector('.load-more-button').click();" if page > 0 else None, + wait_for="css:.article", + css_selector=".article" # Target article elements + ) + print(f"Page {page + 1}: Extracted {len(result.extracted_content)} articles") + ``` + - **Explanation**: + - **session_id**: Ensures all requests share the same browsing state. + - **js_code**: Clicks the “load more” button after the initial page load, expanding content on each iteration. + - **wait_for**: Ensures articles have loaded after each click before extraction. + +#### **5. Advanced Example: E-Commerce Product Search with Filter Selection** + - **Goal**: Interact with filters on an e-commerce page to extract products based on selected criteria. + - **Example Steps**: + 1. **State 1**: Load the main product page. + 2. **State 2**: Apply a filter (e.g., “On Sale”) by selecting a checkbox. + 3. **State 3**: Scroll to load additional products and capture updated results. + + - **Code**: + ```python + async def extract_filtered_products(): + async with AsyncWebCrawler() as crawler: + session_id = "product_session" + + # Step 1: Open product page + result = await crawler.arun( + url="https://example.com/products", + session_id=session_id, + wait_for="css:.product-item" + ) + + # Step 2: Apply filter (e.g., "On Sale") + result = await crawler.arun( + url="https://example.com/products", + session_id=session_id, + js_code="document.querySelector('#sale-filter-checkbox').click();", + wait_for="css:.product-item" + ) + + # Step 3: Scroll to load additional products + for _ in range(2): # Scroll down twice + result = await crawler.arun( + url="https://example.com/products", + session_id=session_id, + js_code="window.scrollTo(0, document.body.scrollHeight);", + wait_for="css:.product-item" + ) + print(f"Loaded {len(result.extracted_content)} products after scroll") + ``` + - **Explanation**: + - **State Persistence**: Each action (filter selection and scroll) builds on the previous session state. + - **Multiple Interactions**: Combines clicking a filter with scrolling, demonstrating how the session preserves these actions. + +#### **6. Key Benefits of Session-Based Crawling** + - **Accessing Hidden Content**: Retrieves data that loads only after user actions. + - **Simulating User Behavior**: Handles interactive elements such as “load more” buttons, dropdowns, and filters. + - **Maintaining Continuity Across States**: Enables a sequential process, moving logically from one state to the next, capturing all desired content without reloading the initial state each time. + +#### **7. Additional Configuration Tips** + - **Manage Session End**: Always conclude the session after the final state to release resources. + - **Optimize with Wait Conditions**: Use `wait_for` to ensure complete loading before each extraction. + - **Handling Errors in Session-Based Crawling**: Include error handling for interactions that may fail, ensuring robustness across state transitions. + +#### **8. Complete Code Example: Multi-Step Session Workflow** + - **Example**: + ```python + async def main(): + await crawl_article_feed() + await extract_filtered_products() + + if __name__ == "__main__": + asyncio.run(main()) + ``` + +#### **9. Wrap Up & Next Steps** + - Recap the usefulness of session-based crawling for dynamic content extraction. + - Tease the next video: **Hooks and Custom Workflow with AsyncWebCrawler** to cover advanced customization options for further control over the crawling process. + +--- + +This outline covers session-based crawling from both a conceptual and practical perspective, helping users understand its importance, configure it effectively, and use it to handle complex dynamic content.# Crawl4AI + +## Episode 13: Chunking Strategies for Large Text Processing + +### Quick Intro +Explain Regex, NLP, and Fixed-Length chunking, and when to use each. Demo: Chunk a large article or document for processing by topics or sentences. + +Here’s a structured outline for the **Chunking Strategies for Large Text Processing** video, emphasizing how chunking works within extraction and why it’s crucial for effective data aggregation. + +Here’s a structured outline for the **Chunking Strategies for Large Text Processing** video, explaining each strategy, when to use it, and providing examples to illustrate. + +--- + +### **12. Chunking Strategies for Large Text Processing** + +#### **1. Introduction to Chunking in Crawl4AI** + - **What is Chunking**: Chunking is the process of dividing large text into manageable sections or “chunks,” enabling efficient processing in extraction tasks. + - **Why It’s Needed**: + - When processing large text, feeding it directly into an extraction function (like `F(x)`) can overwhelm memory or token limits. + - Chunking breaks down `x` (the text) into smaller pieces, which are processed sequentially or in parallel by the extraction function, with the final result being an aggregation of all chunks’ processed output. + +#### **2. Key Chunking Strategies and Use Cases** + - Crawl4AI offers various chunking strategies to suit different text structures, chunk sizes, and processing requirements. + - **Choosing a Strategy**: Select based on the type of text (e.g., articles, transcripts) and extraction needs (e.g., simple splitting or context-sensitive processing). + +#### **3. Strategy 1: Regex-Based Chunking** + - **Description**: Uses regular expressions to split text based on specified patterns (e.g., paragraphs or section breaks). + - **Use Case**: Ideal for dividing text by paragraphs or larger logical blocks where sections are clearly separated by line breaks or punctuation. + - **Example**: + - **Pattern**: `r'\n\n'` for double line breaks. + ```python + chunker = RegexChunking(patterns=[r'\n\n']) + text_chunks = chunker.chunk(long_text) + print(text_chunks) # Output: List of paragraphs + ``` + - **Pros**: Flexible for pattern-based chunking. + - **Cons**: Limited to text with consistent formatting. + +#### **4. Strategy 2: NLP Sentence-Based Chunking** + - **Description**: Uses NLP to split text by sentences, ensuring grammatically complete segments. + - **Use Case**: Useful for extracting individual statements, such as in news articles, quotes, or legal text. + - **Example**: + ```python + chunker = NlpSentenceChunking() + sentence_chunks = chunker.chunk(long_text) + print(sentence_chunks) # Output: List of sentences + ``` + - **Pros**: Maintains sentence structure, ideal for tasks needing semantic completeness. + - **Cons**: May create very small chunks, which could limit contextual extraction. + +#### **5. Strategy 3: Topic-Based Segmentation Using TextTiling** + - **Description**: Segments text into topics using TextTiling, identifying topic shifts and key segments. + - **Use Case**: Ideal for long articles, reports, or essays where each section covers a different topic. + - **Example**: + ```python + chunker = TopicSegmentationChunking(num_keywords=3) + topic_chunks = chunker.chunk_with_topics(long_text) + print(topic_chunks) # Output: List of topic segments with keywords + ``` + - **Pros**: Groups related content, preserving topical coherence. + - **Cons**: Depends on identifiable topic shifts, which may not be present in all texts. + +#### **6. Strategy 4: Fixed-Length Word Chunking** + - **Description**: Splits text into chunks based on a fixed number of words. + - **Use Case**: Ideal for text where exact segment size is required, such as processing word-limited documents for LLMs. + - **Example**: + ```python + chunker = FixedLengthWordChunking(chunk_size=100) + word_chunks = chunker.chunk(long_text) + print(word_chunks) # Output: List of 100-word chunks + ``` + - **Pros**: Ensures uniform chunk sizes, suitable for token-based extraction limits. + - **Cons**: May split sentences, affecting semantic coherence. + +#### **7. Strategy 5: Sliding Window Chunking** + - **Description**: Uses a fixed window size with a step, creating overlapping chunks to maintain context. + - **Use Case**: Useful for maintaining context across sections, as with documents where context is needed for neighboring sections. + - **Example**: + ```python + chunker = SlidingWindowChunking(window_size=100, step=50) + window_chunks = chunker.chunk(long_text) + print(window_chunks) # Output: List of overlapping word chunks + ``` + - **Pros**: Retains context across adjacent chunks, ideal for complex semantic extraction. + - **Cons**: Overlap increases data size, potentially impacting processing time. + +#### **8. Strategy 6: Overlapping Window Chunking** + - **Description**: Similar to sliding windows but with a defined overlap, allowing chunks to share content at the edges. + - **Use Case**: Suitable for handling long texts with essential overlapping information, like research articles or medical records. + - **Example**: + ```python + chunker = OverlappingWindowChunking(window_size=1000, overlap=100) + overlap_chunks = chunker.chunk(long_text) + print(overlap_chunks) # Output: List of overlapping chunks with defined overlap + ``` + - **Pros**: Allows controlled overlap for consistent content coverage across chunks. + - **Cons**: Redundant data in overlapping areas may increase computation. + +#### **9. Practical Example: Using Chunking with an Extraction Strategy** + - **Goal**: Combine chunking with an extraction strategy to process large text effectively. + - **Example Code**: + ```python + from crawl4ai.extraction_strategy import LLMExtractionStrategy + + async def extract_large_text(): + # Initialize chunker and extraction strategy + chunker = FixedLengthWordChunking(chunk_size=200) + extraction_strategy = LLMExtractionStrategy(provider="openai/gpt-4", api_token="your_api_token") + + # Split text into chunks + text_chunks = chunker.chunk(large_text) + + async with AsyncWebCrawler() as crawler: + for chunk in text_chunks: + result = await crawler.arun( + url="https://example.com", + extraction_strategy=extraction_strategy, + content=chunk + ) + print(result.extracted_content) + ``` + + - **Explanation**: + - `chunker.chunk()`: Divides the `large_text` into smaller segments based on the chosen strategy. + - `extraction_strategy`: Processes each chunk separately, and results are then aggregated to form the final output. + +#### **10. Choosing the Right Chunking Strategy** + - **Text Structure**: If text has clear sections (e.g., paragraphs, topics), use Regex or Topic Segmentation. + - **Extraction Needs**: If context is crucial, consider Sliding or Overlapping Window Chunking. + - **Processing Constraints**: For word-limited extractions (e.g., LLMs with token limits), Fixed-Length Word Chunking is often most effective. + +#### **11. Wrap Up & Next Steps** + - Recap the benefits of each chunking strategy and when to use them in extraction workflows. + - Tease the next video: **Hooks and Custom Workflow with AsyncWebCrawler**, focusing on customizing crawler behavior with hooks for a fine-tuned extraction process. + +--- + +This outline provides a complete understanding of chunking strategies, explaining each method’s strengths and best-use scenarios to help users process large texts effectively in Crawl4AI.# Crawl4AI + +## Episode 14: Hooks and Custom Workflow with AsyncWebCrawler + +### Quick Intro +Cover hooks (`on_browser_created`, `before_goto`, `after_goto`) to add custom workflows. Demo: Use hooks to add custom cookies or headers, log HTML, or trigger specific events on page load. + +Here’s a detailed outline for the **Hooks and Custom Workflow with AsyncWebCrawler** video, covering each hook’s purpose, usage, and example implementations. + +--- + +### **13. Hooks and Custom Workflow with AsyncWebCrawler** + +#### **1. Introduction to Hooks in Crawl4AI** + - **What are Hooks**: Hooks are customizable entry points in the crawling process that allow users to inject custom actions or logic at specific stages. + - **Why Use Hooks**: + - They enable fine-grained control over the crawling workflow. + - Useful for performing additional tasks (e.g., logging, modifying headers) dynamically during the crawl. + - Hooks provide the flexibility to adapt the crawler to complex site structures or unique project needs. + +#### **2. Overview of Available Hooks** + - Crawl4AI offers seven key hooks to modify and control different stages in the crawling lifecycle: + - `on_browser_created` + - `on_user_agent_updated` + - `on_execution_started` + - `before_goto` + - `after_goto` + - `before_return_html` + - `before_retrieve_html` + +#### **3. Hook-by-Hook Explanation and Examples** + +--- + +##### **Hook 1: `on_browser_created`** + - **Purpose**: Triggered right after the browser instance is created. + - **Use Case**: + - Initializing browser-specific settings or performing setup actions. + - Configuring browser extensions or scripts before any page is opened. + - **Example**: + ```python + async def log_browser_creation(browser): + print("Browser instance created:", browser) + + crawler.crawler_strategy.set_hook('on_browser_created', log_browser_creation) + ``` + - **Explanation**: This hook logs the browser creation event, useful for tracking when a new browser instance starts. + +--- + +##### **Hook 2: `on_user_agent_updated`** + - **Purpose**: Called whenever the user agent string is updated. + - **Use Case**: + - Modifying the user agent based on page requirements, e.g., changing to a mobile user agent for mobile-only pages. + - **Example**: + ```python + def update_user_agent(user_agent): + print(f"User Agent Updated: {user_agent}") + + crawler.crawler_strategy.set_hook('on_user_agent_updated', update_user_agent) + crawler.update_user_agent("Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X)") + ``` + - **Explanation**: This hook provides a callback every time the user agent changes, helpful for debugging or dynamically altering user agent settings based on conditions. + +--- + +##### **Hook 3: `on_execution_started`** + - **Purpose**: Called right before the crawler begins any interaction (e.g., JavaScript execution, clicks). + - **Use Case**: + - Performing setup actions, such as inserting cookies or initiating custom scripts. + - **Example**: + ```python + async def log_execution_start(page): + print("Execution started on page:", page.url) + + crawler.crawler_strategy.set_hook('on_execution_started', log_execution_start) + ``` + - **Explanation**: Logs the start of any major interaction on the page, ideal for cases where you want to monitor each interaction. + +--- + +##### **Hook 4: `before_goto`** + - **Purpose**: Triggered before navigating to a new URL with `page.goto()`. + - **Use Case**: + - Modifying request headers or setting up conditions right before the page loads. + - Adding headers or dynamically adjusting options for specific URLs. + - **Example**: + ```python + async def modify_headers_before_goto(page): + await page.set_extra_http_headers({"X-Custom-Header": "CustomValue"}) + print("Custom headers set before navigation") + + crawler.crawler_strategy.set_hook('before_goto', modify_headers_before_goto) + ``` + - **Explanation**: This hook allows injecting headers or altering settings based on the page’s needs, particularly useful for pages with custom requirements. + +--- + +##### **Hook 5: `after_goto`** + - **Purpose**: Executed immediately after a page has loaded (after `page.goto()`). + - **Use Case**: + - Checking the loaded page state, modifying the DOM, or performing post-navigation actions (e.g., scrolling). + - **Example**: + ```python + async def post_navigation_scroll(page): + await page.evaluate("window.scrollTo(0, document.body.scrollHeight)") + print("Scrolled to the bottom after navigation") + + crawler.crawler_strategy.set_hook('after_goto', post_navigation_scroll) + ``` + - **Explanation**: This hook scrolls to the bottom of the page after loading, which can help load dynamically added content like infinite scroll elements. + +--- + +##### **Hook 6: `before_return_html`** + - **Purpose**: Called right before HTML content is retrieved and returned. + - **Use Case**: + - Removing overlays or cleaning up the page for a cleaner HTML extraction. + - **Example**: + ```python + async def remove_advertisements(page, html): + await page.evaluate("document.querySelectorAll('.ad-banner').forEach(el => el.remove());") + print("Advertisements removed before returning HTML") + + crawler.crawler_strategy.set_hook('before_return_html', remove_advertisements) + ``` + - **Explanation**: The hook removes ad banners from the HTML before it’s retrieved, ensuring a cleaner data extraction. + +--- + +##### **Hook 7: `before_retrieve_html`** + - **Purpose**: Runs right before Crawl4AI initiates HTML retrieval. + - **Use Case**: + - Finalizing any page adjustments (e.g., setting timers, waiting for specific elements). + - **Example**: + ```python + async def wait_for_content_before_retrieve(page): + await page.wait_for_selector('.main-content') + print("Main content loaded, ready to retrieve HTML") + + crawler.crawler_strategy.set_hook('before_retrieve_html', wait_for_content_before_retrieve) + ``` + - **Explanation**: This hook waits for the main content to load before retrieving the HTML, ensuring that all essential content is captured. + +#### **4. Setting Hooks in Crawl4AI** + - **How to Set Hooks**: + - Use `set_hook` to define a custom function for each hook. + - Each hook function can be asynchronous (useful for actions like waiting or retrieving async data). + - **Example Setup**: + ```python + crawler.crawler_strategy.set_hook('on_browser_created', log_browser_creation) + crawler.crawler_strategy.set_hook('before_goto', modify_headers_before_goto) + crawler.crawler_strategy.set_hook('after_goto', post_navigation_scroll) + ``` + +#### **5. Complete Example: Using Hooks for a Customized Crawl Workflow** + - **Goal**: Log each key step, set custom headers before navigation, and clean up the page before retrieving HTML. + - **Example Code**: + ```python + async def custom_crawl(): + async with AsyncWebCrawler() as crawler: + # Set hooks for custom workflow + crawler.crawler_strategy.set_hook('on_browser_created', log_browser_creation) + crawler.crawler_strategy.set_hook('before_goto', modify_headers_before_goto) + crawler.crawler_strategy.set_hook('after_goto', post_navigation_scroll) + crawler.crawler_strategy.set_hook('before_return_html', remove_advertisements) + + # Perform the crawl + url = "https://example.com" + result = await crawler.arun(url=url) + print(result.html) # Display or process HTML + ``` + +#### **6. Benefits of Using Hooks in Custom Crawling Workflows** + - **Enhanced Control**: Hooks offer precise control over each stage, allowing adjustments based on content and structure. + - **Efficient Modifications**: Avoid reloading or restarting the session; hooks can alter actions dynamically. + - **Context-Sensitive Actions**: Hooks enable custom logic tailored to specific pages or sections, maximizing extraction quality. + +#### **7. Wrap Up & Next Steps** + - Recap how hooks empower customized workflows in Crawl4AI, enabling flexibility at every stage. + - Tease the next video: **Automating Post-Processing with Crawl4AI**, covering automated steps after data extraction. + +--- + +This outline provides a thorough understanding of hooks, their practical applications, and examples for customizing the crawling workflow in Crawl4AI. \ No newline at end of file diff --git a/docs/notebooks/Crawl4AI_v0.3.72_Release_Announcement.ipynb b/docs/notebooks/Crawl4AI_v0.3.72_Release_Announcement.ipynb new file mode 100644 index 0000000..053bc6c --- /dev/null +++ b/docs/notebooks/Crawl4AI_v0.3.72_Release_Announcement.ipynb @@ -0,0 +1,235 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 🚀 Crawl4AI v0.3.72 Release Announcement\n", + "\n", + "Welcome to the new release of **Crawl4AI v0.3.72**! This notebook highlights the latest features and demonstrates how they work in real-time. Follow along to see each feature in action!\n", + "\n", + "### What’s New?\n", + "- ✨ `Fit Markdown`: Extracts only the main content from articles and blogs\n", + "- 🛡️ **Magic Mode**: Comprehensive anti-bot detection bypass\n", + "- 🌐 **Multi-browser support**: Switch between Chromium, Firefox, WebKit\n", + "- 🔍 **Knowledge Graph Extraction**: Generate structured graphs of entities & relationships from any URL\n", + "- 🤖 **Crawl4AI GPT Assistant**: Chat directly with our AI assistant for help, code generation, and faster learning (available [here](https://tinyurl.com/your-gpt-assistant-link))\n", + "\n", + "---\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 📥 Setup\n", + "To start, we'll install `Crawl4AI` along with Playwright and `nest_asyncio` to ensure compatibility with Colab’s asynchronous environment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Install Crawl4AI and dependencies\n", + "!pip install crawl4ai\n", + "!playwright install\n", + "!pip install nest_asyncio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Import nest_asyncio and apply it to allow asyncio in Colab\n", + "import nest_asyncio\n", + "nest_asyncio.apply()\n", + "\n", + "print('Setup complete!')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ✨ Feature 1: `Fit Markdown`\n", + "Extracts only the main content from articles and blog pages, removing sidebars, ads, and other distractions.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import asyncio\n", + "from crawl4ai import AsyncWebCrawler\n", + "\n", + "async def fit_markdown_demo():\n", + " async with AsyncWebCrawler() as crawler:\n", + " result = await crawler.arun(url=\"https://janineintheworld.com/places-to-visit-in-central-mexico\")\n", + " print(result.fit_markdown) # Shows main content in Markdown format\n", + "\n", + "# Run the demo\n", + "await fit_markdown_demo()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## 🛡️ Feature 2: Magic Mode\n", + "Magic Mode bypasses anti-bot detection to make crawling more reliable on protected websites.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "async def magic_mode_demo():\n", + " async with AsyncWebCrawler() as crawler: # Enables anti-bot detection bypass\n", + " result = await crawler.arun(\n", + " url=\"https://www.reuters.com/markets/us/global-markets-view-usa-pix-2024-08-29/\",\n", + " magic=True # Enables magic mode\n", + " )\n", + " print(result.markdown) # Shows the full content in Markdown format\n", + "\n", + "# Run the demo\n", + "await magic_mode_demo()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## 🌐 Feature 3: Multi-Browser Support\n", + "Crawl4AI now supports Chromium, Firefox, and WebKit. Here’s how to specify Firefox for a crawl.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "async def multi_browser_demo():\n", + " async with AsyncWebCrawler(browser_type=\"firefox\") as crawler: # Using Firefox instead of default Chromium\n", + " result = await crawler.arun(url=\"https://crawl4i.com\")\n", + " print(result.markdown) # Shows content extracted using Firefox\n", + "\n", + "# Run the demo\n", + "await multi_browser_demo()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## ✨ Put them all together\n", + "\n", + "Let's combine all the features to extract the main content from a blog post, bypass anti-bot detection, and generate a knowledge graph from the extracted content." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from crawl4ai.extraction_strategy import LLMExtractionStrategy\n", + "from pydantic import BaseModel\n", + "import json, os\n", + "from typing import List\n", + "\n", + "# Define classes for the knowledge graph structure\n", + "class Landmark(BaseModel):\n", + " name: str\n", + " description: str\n", + " activities: list[str] # E.g., visiting, sightseeing, relaxing\n", + "\n", + "class City(BaseModel):\n", + " name: str\n", + " description: str\n", + " landmarks: list[Landmark]\n", + " cultural_highlights: list[str] # E.g., food, music, traditional crafts\n", + "\n", + "class TravelKnowledgeGraph(BaseModel):\n", + " cities: list[City] # Central Mexican cities to visit\n", + "\n", + "async def combined_demo():\n", + " # Define the knowledge graph extraction strategy\n", + " strategy = LLMExtractionStrategy(\n", + " # provider=\"ollama/nemotron\",\n", + " provider='openai/gpt-4o-mini', # Or any other provider, including Ollama and open source models\n", + " pi_token=os.getenv('OPENAI_API_KEY'), # In case of Ollama just pass \"no-token\"\n", + " schema=TravelKnowledgeGraph.schema(),\n", + " instruction=(\n", + " \"Extract cities, landmarks, and cultural highlights for places to visit in Central Mexico. \"\n", + " \"For each city, list main landmarks with descriptions and activities, as well as cultural highlights.\"\n", + " )\n", + " )\n", + "\n", + " # Set up the AsyncWebCrawler with multi-browser support, Magic Mode, and Fit Markdown\n", + " async with AsyncWebCrawler(browser_type=\"firefox\") as crawler:\n", + " result = await crawler.arun(\n", + " url=\"https://janineintheworld.com/places-to-visit-in-central-mexico\",\n", + " extraction_strategy=strategy,\n", + " bypass_cache=True,\n", + " magic=True\n", + " )\n", + " \n", + " # Display main article content in Fit Markdown format\n", + " print(\"Extracted Main Content:\\n\", result.fit_markdown)\n", + " \n", + " # Display extracted knowledge graph of cities, landmarks, and cultural highlights\n", + " if result.extracted_content:\n", + " travel_graph = json.loads(result.extracted_content)\n", + " print(\"\\nExtracted Knowledge Graph:\\n\", json.dumps(travel_graph, indent=2))\n", + "\n", + "# Run the combined demo\n", + "await combined_demo()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "---\n", + "\n", + "## 🤖 Crawl4AI GPT Assistant\n", + "Chat with the Crawl4AI GPT Assistant for code generation, support, and learning Crawl4AI faster. Try it out [here](https://tinyurl.com/crawl4ai-gpt)!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "name": "python", + "version": "3.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/scrapper/async_web_scraper.md b/docs/scraper/async_web_scraper.md similarity index 100% rename from docs/scrapper/async_web_scraper.md rename to docs/scraper/async_web_scraper.md diff --git a/docs/scrapper/bfs_scraper_strategy.md b/docs/scraper/bfs_scraper_strategy.md similarity index 100% rename from docs/scrapper/bfs_scraper_strategy.md rename to docs/scraper/bfs_scraper_strategy.md diff --git a/docs/scrapper/filters_scrorers.md b/docs/scraper/filters_scrorers.md similarity index 100% rename from docs/scrapper/filters_scrorers.md rename to docs/scraper/filters_scrorers.md diff --git a/docs/scrapper/how_to_use.md b/docs/scraper/how_to_use.md similarity index 100% rename from docs/scrapper/how_to_use.md rename to docs/scraper/how_to_use.md diff --git a/docs/scrapper/scraper_quickstart.py b/docs/scraper/scraper_quickstart.py similarity index 52% rename from docs/scrapper/scraper_quickstart.py rename to docs/scraper/scraper_quickstart.py index a2c7a23..9e380cf 100644 --- a/docs/scrapper/scraper_quickstart.py +++ b/docs/scraper/scraper_quickstart.py @@ -7,6 +7,7 @@ ContentTypeFilter ) from crawl4ai.async_webcrawler import AsyncWebCrawler +import re async def basic_scraper_example(): """ @@ -18,7 +19,7 @@ async def basic_scraper_example(): # Create a simple filter chain filter_chain = FilterChain([ # Only crawl pages within the blog section - URLPatternFilter("*/blog/*"), + URLPatternFilter("*/tutorial/*"), # Only process HTML pages ContentTypeFilter(["text/html"]) ]) @@ -28,24 +29,24 @@ async def basic_scraper_example(): max_depth=2, # Only go 2 levels deep filter_chain=filter_chain, url_scorer=None, # Use default scoring - max_concurrent=3 # Limit concurrent requests + max_concurrent=3, # Limit concurrent requests + process_external_links=True ) # Create the crawler and scraper - crawler = AsyncWebCrawler() - scraper = AsyncWebScraper(crawler, strategy) - - # Start scraping - try: - result = await scraper.ascrape("https://example.com/blog/") - - # Process results - print(f"Crawled {len(result.crawled_urls)} pages:") - for url, data in result.extracted_data.items(): - print(f"- {url}: {len(data.html)} bytes") + async with AsyncWebCrawler(verbose=True) as crawler: + scraper = AsyncWebScraper(crawler, strategy) + # Start scraping + try: + result = await scraper.ascrape("https://crawl4ai.com/mkdocs") + + # Process results + print(f"Crawled {len(result.crawled_urls)} pages:") + for url, data in result.extracted_data.items(): + print(f"- {url}: {len(data.html)} bytes") - except Exception as e: - print(f"Error during scraping: {e}") + except Exception as e: + print(f"Error during scraping: {e}") # advanced_scraper_example.py import logging @@ -79,8 +80,8 @@ async def advanced_scraper_example(): filter_chain = FilterChain([ # Domain control DomainFilter( - allowed_domains=["example.com", "blog.example.com"], - blocked_domains=["ads.example.com", "tracker.example.com"] + allowed_domains=["techcrunch.com"], + blocked_domains=["login.techcrunch.com","legal.yahoo.com"] ), # URL patterns URLPatternFilter([ @@ -114,7 +115,7 @@ async def advanced_scraper_example(): # Initialize strategy with advanced configuration strategy = BFSScraperStrategy( - max_depth=4, + max_depth=2, filter_chain=filter_chain, url_scorer=scorer, max_concurrent=5, @@ -122,56 +123,53 @@ async def advanced_scraper_example(): ) # Create crawler and scraper - crawler = AsyncWebCrawler() - scraper = AsyncWebScraper(crawler, strategy) + async with AsyncWebCrawler(verbose=True) as crawler: + scraper = AsyncWebScraper(crawler, strategy) - # Track statistics - stats = { - 'processed': 0, - 'errors': 0, - 'total_size': 0 - } + # Track statistics + stats = { + 'processed': 0, + 'errors': 0, + 'total_size': 0 + } - try: - # Use streaming mode - async for result in scraper.ascrape("https://example.com/news/", stream=True): - stats['processed'] += 1 - - if result.success: - stats['total_size'] += len(result.html) - logger.info(f"Processed: {result.url}") + try: + # Use streaming mode + result_generator = await scraper.ascrape("https://techcrunch.com", parallel_processing=True, stream=True) + async for result in result_generator: + stats['processed'] += 1 - # Print scoring information - for scorer_name, score in result.scores.items(): - logger.debug(f"{scorer_name}: {score:.2f}") - else: - stats['errors'] += 1 - logger.error(f"Failed to process {result.url}: {result.error_message}") + if result.success: + stats['total_size'] += len(result.html) + logger.info(f"Processed: {result.url}") + else: + stats['errors'] += 1 + logger.error(f"Failed to process {result.url}: {result.error_message}") - # Log progress regularly - if stats['processed'] % 10 == 0: - logger.info(f"Progress: {stats['processed']} URLs processed") + # Log progress regularly + if stats['processed'] % 10 == 0: + logger.info(f"Progress: {stats['processed']} URLs processed") - except Exception as e: - logger.error(f"Scraping error: {e}") - - finally: - # Print final statistics - logger.info("Scraping completed:") - logger.info(f"- URLs processed: {stats['processed']}") - logger.info(f"- Errors: {stats['errors']}") - logger.info(f"- Total content size: {stats['total_size'] / 1024:.2f} KB") + except Exception as e: + logger.error(f"Scraping error: {e}") - # Print filter statistics - for filter_ in filter_chain.filters: - logger.info(f"{filter_.name} stats:") - logger.info(f"- Passed: {filter_.stats.passed_urls}") - logger.info(f"- Rejected: {filter_.stats.rejected_urls}") - - # Print scorer statistics - logger.info("Scoring statistics:") - logger.info(f"- Average score: {scorer.stats.average_score:.2f}") - logger.info(f"- Score range: {scorer.stats.min_score:.2f} - {scorer.stats.max_score:.2f}") + finally: + # Print final statistics + logger.info("Scraping completed:") + logger.info(f"- URLs processed: {stats['processed']}") + logger.info(f"- Errors: {stats['errors']}") + logger.info(f"- Total content size: {stats['total_size'] / 1024:.2f} KB") + + # Print filter statistics + for filter_ in filter_chain.filters: + logger.info(f"{filter_.name} stats:") + logger.info(f"- Passed: {filter_.stats.passed_urls}") + logger.info(f"- Rejected: {filter_.stats.rejected_urls}") + + # Print scorer statistics + logger.info("Scoring statistics:") + logger.info(f"- Average score: {scorer.stats.average_score:.2f}") + logger.info(f"- Score range: {scorer.stats.min_score:.2f} - {scorer.stats.max_score:.2f}") if __name__ == "__main__": import asyncio @@ -179,6 +177,7 @@ async def advanced_scraper_example(): # Run basic example print("Running basic scraper example...") asyncio.run(basic_scraper_example()) - + + # Run advanced example print("\nRunning advanced scraper example...") asyncio.run(advanced_scraper_example()) \ No newline at end of file diff --git a/main.py b/main.py index 71d5eee..6d21741 100644 --- a/main.py +++ b/main.py @@ -1,10 +1,6 @@ -import os -import importlib -import asyncio -from functools import lru_cache -import logging -logging.basicConfig(level=logging.DEBUG) - +import asyncio, os +from fastapi import FastAPI, HTTPException, BackgroundTasks, Request +from fastapi.responses import JSONResponse from fastapi import FastAPI, HTTPException, Request from fastapi.responses import HTMLResponse, JSONResponse from fastapi.staticfiles import StaticFiles @@ -14,102 +10,325 @@ from starlette.middleware.base import BaseHTTPMiddleware from starlette.responses import FileResponse from fastapi.responses import RedirectResponse +from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials +from fastapi import Depends, Security -from pydantic import BaseModel, HttpUrl -from concurrent.futures import ThreadPoolExecutor, as_completed -from typing import List, Optional +from pydantic import BaseModel, HttpUrl, Field +from typing import Optional, List, Dict, Any, Union +import psutil +import time +import uuid +from collections import defaultdict +from urllib.parse import urlparse +import math +import logging +from enum import Enum +from dataclasses import dataclass +import json +from crawl4ai import AsyncWebCrawler, CrawlResult, CacheMode +from crawl4ai.config import MIN_WORD_THRESHOLD +from crawl4ai.extraction_strategy import ( + LLMExtractionStrategy, + CosineStrategy, + JsonCssExtractionStrategy, +) -from crawl4ai.web_crawler import WebCrawler -from crawl4ai.database import get_total_count, clear_db +__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) -import time -from slowapi import Limiter, _rate_limit_exceeded_handler -from slowapi.util import get_remote_address -from slowapi.errors import RateLimitExceeded +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) -# load .env file -from dotenv import load_dotenv -load_dotenv() +class TaskStatus(str, Enum): + PENDING = "pending" + PROCESSING = "processing" + COMPLETED = "completed" + FAILED = "failed" -# Configuration -__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) -MAX_CONCURRENT_REQUESTS = 10 # Adjust this to change the maximum concurrent requests -current_requests = 0 -lock = asyncio.Lock() +class CrawlerType(str, Enum): + BASIC = "basic" + LLM = "llm" + COSINE = "cosine" + JSON_CSS = "json_css" -app = FastAPI() +class ExtractionConfig(BaseModel): + type: CrawlerType + params: Dict[str, Any] = {} -# Initialize rate limiter -def rate_limit_key_func(request: Request): - access_token = request.headers.get("access-token") - if access_token == os.environ.get('ACCESS_TOKEN'): - return None - return get_remote_address(request) - -limiter = Limiter(key_func=rate_limit_key_func) -app.state.limiter = limiter - -# Dictionary to store last request times for each client -last_request_times = {} -last_rate_limit = {} - - -def get_rate_limit(): - limit = os.environ.get('ACCESS_PER_MIN', "5") - return f"{limit}/minute" - -# Custom rate limit exceeded handler -async def custom_rate_limit_exceeded_handler(request: Request, exc: RateLimitExceeded) -> JSONResponse: - if request.client.host not in last_rate_limit or time.time() - last_rate_limit[request.client.host] > 60: - last_rate_limit[request.client.host] = time.time() - retry_after = 60 - (time.time() - last_rate_limit[request.client.host]) - reset_at = time.time() + retry_after - return JSONResponse( - status_code=429, - content={ - "detail": "Rate limit exceeded", - "limit": str(exc.limit.limit), - "retry_after": retry_after, - 'reset_at': reset_at, - "message": f"You have exceeded the rate limit of {exc.limit.limit}." - } - ) - -app.add_exception_handler(RateLimitExceeded, custom_rate_limit_exceeded_handler) +class ChunkingStrategy(BaseModel): + type: str + params: Dict[str, Any] = {} +class ContentFilter(BaseModel): + type: str = "bm25" + params: Dict[str, Any] = {} -# Middleware for token-based bypass and per-request limit -class RateLimitMiddleware(BaseHTTPMiddleware): - async def dispatch(self, request: Request, call_next): - SPAN = int(os.environ.get('ACCESS_TIME_SPAN', 10)) - access_token = request.headers.get("access-token") - if access_token == os.environ.get('ACCESS_TOKEN'): - return await call_next(request) - - path = request.url.path - if path in ["/crawl", "/old"]: - client_ip = request.client.host +class CrawlRequest(BaseModel): + urls: Union[HttpUrl, List[HttpUrl]] + word_count_threshold: int = MIN_WORD_THRESHOLD + extraction_config: Optional[ExtractionConfig] = None + chunking_strategy: Optional[ChunkingStrategy] = None + content_filter: Optional[ContentFilter] = None + js_code: Optional[List[str]] = None + wait_for: Optional[str] = None + css_selector: Optional[str] = None + screenshot: bool = False + magic: bool = False + extra: Optional[Dict[str, Any]] = {} + session_id: Optional[str] = None + cache_mode: Optional[CacheMode] = CacheMode.ENABLED + priority: int = Field(default=5, ge=1, le=10) + ttl: Optional[int] = 3600 + crawler_params: Dict[str, Any] = {} + +@dataclass +class TaskInfo: + id: str + status: TaskStatus + result: Optional[Union[CrawlResult, List[CrawlResult]]] = None + error: Optional[str] = None + created_at: float = time.time() + ttl: int = 3600 + +class ResourceMonitor: + def __init__(self, max_concurrent_tasks: int = 10): + self.max_concurrent_tasks = max_concurrent_tasks + self.memory_threshold = 0.85 + self.cpu_threshold = 0.90 + self._last_check = 0 + self._check_interval = 1 # seconds + self._last_available_slots = max_concurrent_tasks + + async def get_available_slots(self) -> int: + current_time = time.time() + if current_time - self._last_check < self._check_interval: + return self._last_available_slots + + mem_usage = psutil.virtual_memory().percent / 100 + cpu_usage = psutil.cpu_percent() / 100 + + memory_factor = max(0, (self.memory_threshold - mem_usage) / self.memory_threshold) + cpu_factor = max(0, (self.cpu_threshold - cpu_usage) / self.cpu_threshold) + + self._last_available_slots = math.floor( + self.max_concurrent_tasks * min(memory_factor, cpu_factor) + ) + self._last_check = current_time + + return self._last_available_slots + +class TaskManager: + def __init__(self, cleanup_interval: int = 300): + self.tasks: Dict[str, TaskInfo] = {} + self.high_priority = asyncio.PriorityQueue() + self.low_priority = asyncio.PriorityQueue() + self.cleanup_interval = cleanup_interval + self.cleanup_task = None + + async def start(self): + self.cleanup_task = asyncio.create_task(self._cleanup_loop()) + + async def stop(self): + if self.cleanup_task: + self.cleanup_task.cancel() + try: + await self.cleanup_task + except asyncio.CancelledError: + pass + + async def add_task(self, task_id: str, priority: int, ttl: int) -> None: + task_info = TaskInfo(id=task_id, status=TaskStatus.PENDING, ttl=ttl) + self.tasks[task_id] = task_info + queue = self.high_priority if priority > 5 else self.low_priority + await queue.put((-priority, task_id)) # Negative for proper priority ordering + + async def get_next_task(self) -> Optional[str]: + try: + # Try high priority first + _, task_id = await asyncio.wait_for(self.high_priority.get(), timeout=0.1) + return task_id + except asyncio.TimeoutError: + try: + # Then try low priority + _, task_id = await asyncio.wait_for(self.low_priority.get(), timeout=0.1) + return task_id + except asyncio.TimeoutError: + return None + + def update_task(self, task_id: str, status: TaskStatus, result: Any = None, error: str = None): + if task_id in self.tasks: + task_info = self.tasks[task_id] + task_info.status = status + task_info.result = result + task_info.error = error + + def get_task(self, task_id: str) -> Optional[TaskInfo]: + return self.tasks.get(task_id) + + async def _cleanup_loop(self): + while True: + try: + await asyncio.sleep(self.cleanup_interval) + current_time = time.time() + expired_tasks = [ + task_id + for task_id, task in self.tasks.items() + if current_time - task.created_at > task.ttl + and task.status in [TaskStatus.COMPLETED, TaskStatus.FAILED] + ] + for task_id in expired_tasks: + del self.tasks[task_id] + except Exception as e: + logger.error(f"Error in cleanup loop: {e}") + +class CrawlerPool: + def __init__(self, max_size: int = 10): + self.max_size = max_size + self.active_crawlers: Dict[AsyncWebCrawler, float] = {} + self._lock = asyncio.Lock() + + async def acquire(self, **kwargs) -> AsyncWebCrawler: + async with self._lock: + # Clean up inactive crawlers current_time = time.time() - - # Check time since last request - if client_ip in last_request_times: - time_since_last_request = current_time - last_request_times[client_ip] - if time_since_last_request < SPAN: - return JSONResponse( - status_code=429, - content={ - "detail": "Too many requests", - "message": "Rate limit exceeded. Please wait 10 seconds between requests.", - "retry_after": max(0, SPAN - time_since_last_request), - "reset_at": current_time + max(0, SPAN - time_since_last_request), - } - ) - - last_request_times[client_ip] = current_time - - return await call_next(request) + inactive = [ + crawler + for crawler, last_used in self.active_crawlers.items() + if current_time - last_used > 600 # 10 minutes timeout + ] + for crawler in inactive: + await crawler.__aexit__(None, None, None) + del self.active_crawlers[crawler] + + # Create new crawler if needed + if len(self.active_crawlers) < self.max_size: + crawler = AsyncWebCrawler(**kwargs) + await crawler.__aenter__() + self.active_crawlers[crawler] = current_time + return crawler + + # Reuse least recently used crawler + crawler = min(self.active_crawlers.items(), key=lambda x: x[1])[0] + self.active_crawlers[crawler] = current_time + return crawler + + async def release(self, crawler: AsyncWebCrawler): + async with self._lock: + if crawler in self.active_crawlers: + self.active_crawlers[crawler] = time.time() + + async def cleanup(self): + async with self._lock: + for crawler in list(self.active_crawlers.keys()): + await crawler.__aexit__(None, None, None) + self.active_crawlers.clear() + +class CrawlerService: + def __init__(self, max_concurrent_tasks: int = 10): + self.resource_monitor = ResourceMonitor(max_concurrent_tasks) + self.task_manager = TaskManager() + self.crawler_pool = CrawlerPool(max_concurrent_tasks) + self._processing_task = None + + async def start(self): + await self.task_manager.start() + self._processing_task = asyncio.create_task(self._process_queue()) + + async def stop(self): + if self._processing_task: + self._processing_task.cancel() + try: + await self._processing_task + except asyncio.CancelledError: + pass + await self.task_manager.stop() + await self.crawler_pool.cleanup() + + def _create_extraction_strategy(self, config: ExtractionConfig): + if not config: + return None + + if config.type == CrawlerType.LLM: + return LLMExtractionStrategy(**config.params) + elif config.type == CrawlerType.COSINE: + return CosineStrategy(**config.params) + elif config.type == CrawlerType.JSON_CSS: + return JsonCssExtractionStrategy(**config.params) + return None -app.add_middleware(RateLimitMiddleware) + async def submit_task(self, request: CrawlRequest) -> str: + task_id = str(uuid.uuid4()) + await self.task_manager.add_task(task_id, request.priority, request.ttl or 3600) + + # Store request data with task + self.task_manager.tasks[task_id].request = request + + return task_id + + async def _process_queue(self): + while True: + try: + available_slots = await self.resource_monitor.get_available_slots() + if False and available_slots <= 0: + await asyncio.sleep(1) + continue + + task_id = await self.task_manager.get_next_task() + if not task_id: + await asyncio.sleep(1) + continue + + task_info = self.task_manager.get_task(task_id) + if not task_info: + continue + + request = task_info.request + self.task_manager.update_task(task_id, TaskStatus.PROCESSING) + + try: + crawler = await self.crawler_pool.acquire(**request.crawler_params) + + extraction_strategy = self._create_extraction_strategy(request.extraction_config) + + if isinstance(request.urls, list): + results = await crawler.arun_many( + urls=[str(url) for url in request.urls], + word_count_threshold=MIN_WORD_THRESHOLD, + extraction_strategy=extraction_strategy, + js_code=request.js_code, + wait_for=request.wait_for, + css_selector=request.css_selector, + screenshot=request.screenshot, + magic=request.magic, + session_id=request.session_id, + cache_mode=request.cache_mode, + **request.extra, + ) + else: + results = await crawler.arun( + url=str(request.urls), + extraction_strategy=extraction_strategy, + js_code=request.js_code, + wait_for=request.wait_for, + css_selector=request.css_selector, + screenshot=request.screenshot, + magic=request.magic, + session_id=request.session_id, + cache_mode=request.cache_mode, + **request.extra, + ) + + await self.crawler_pool.release(crawler) + self.task_manager.update_task(task_id, TaskStatus.COMPLETED, results) + + except Exception as e: + logger.error(f"Error processing task {task_id}: {str(e)}") + self.task_manager.update_task(task_id, TaskStatus.FAILED, error=str(e)) + + except Exception as e: + logger.error(f"Error in queue processing: {str(e)}") + await asyncio.sleep(1) + +app = FastAPI(title="Crawl4AI API") # CORS configuration origins = ["*"] # Allow all origins @@ -123,132 +342,151 @@ async def dispatch(self, request: Request, call_next): # Mount the pages directory as a static directory app.mount("/pages", StaticFiles(directory=__location__ + "/pages"), name="pages") -app.mount("/mkdocs", StaticFiles(directory="site", html=True), name="mkdocs") -site_templates = Jinja2Templates(directory=__location__ + "/site") -templates = Jinja2Templates(directory=__location__ + "/pages") -@lru_cache() -def get_crawler(): - # Initialize and return a WebCrawler instance - crawler = WebCrawler(verbose = True) - crawler.warmup() - return crawler +# API token security +security = HTTPBearer() +CRAWL4AI_API_TOKEN = os.getenv("CRAWL4AI_API_TOKEN") or "test_api_code" -class CrawlRequest(BaseModel): - urls: List[str] - include_raw_html: Optional[bool] = False - bypass_cache: bool = False - extract_blocks: bool = True - word_count_threshold: Optional[int] = 5 - extraction_strategy: Optional[str] = "NoExtractionStrategy" - extraction_strategy_args: Optional[dict] = {} - chunking_strategy: Optional[str] = "RegexChunking" - chunking_strategy_args: Optional[dict] = {} - css_selector: Optional[str] = None - screenshot: Optional[bool] = False - user_agent: Optional[str] = None - verbose: Optional[bool] = True +async def verify_token(credentials: HTTPAuthorizationCredentials = Security(security)): + if not CRAWL4AI_API_TOKEN: + return credentials # No token verification if CRAWL4AI_API_TOKEN is not set + if credentials.credentials != CRAWL4AI_API_TOKEN: + raise HTTPException(status_code=401, detail="Invalid token") + return credentials -@app.get("/") -def read_root(): - return RedirectResponse(url="/mkdocs") +# Helper function to conditionally apply security +def secure_endpoint(): + return Depends(verify_token) if CRAWL4AI_API_TOKEN else None -@app.get("/old", response_class=HTMLResponse) -@limiter.limit(get_rate_limit()) -async def read_index(request: Request): - partials_dir = os.path.join(__location__, "pages", "partial") - partials = {} +# Check if site directory exists +if os.path.exists(__location__ + "/site"): + # Mount the site directory as a static directory + app.mount("/mkdocs", StaticFiles(directory="site", html=True), name="mkdocs") - for filename in os.listdir(partials_dir): - if filename.endswith(".html"): - with open(os.path.join(partials_dir, filename), "r", encoding="utf8") as file: - partials[filename[:-5]] = file.read() +site_templates = Jinja2Templates(directory=__location__ + "/site") +templates = Jinja2Templates(directory=__location__ + "/pages") - return templates.TemplateResponse("index.html", {"request": request, **partials}) +crawler_service = CrawlerService() -@app.get("/total-count") -async def get_total_url_count(): - count = get_total_count() - return JSONResponse(content={"count": count}) +@app.on_event("startup") +async def startup_event(): + await crawler_service.start() -@app.get("/clear-db") -async def clear_database(): - # clear_db() - return JSONResponse(content={"message": "Database cleared."}) +@app.on_event("shutdown") +async def shutdown_event(): + await crawler_service.stop() -def import_strategy(module_name: str, class_name: str, *args, **kwargs): - try: - module = importlib.import_module(module_name) - strategy_class = getattr(module, class_name) - return strategy_class(*args, **kwargs) - except ImportError: - print("ImportError: Module not found.") - raise HTTPException(status_code=400, detail=f"Module {module_name} not found.") - except AttributeError: - print("AttributeError: Class not found.") - raise HTTPException(status_code=400, detail=f"Class {class_name} not found in {module_name}.") - -@app.post("/crawl") -@limiter.limit(get_rate_limit()) -async def crawl_urls(crawl_request: CrawlRequest, request: Request): - logging.debug(f"[LOG] Crawl request for URL: {crawl_request.urls}") - global current_requests - async with lock: - if current_requests >= MAX_CONCURRENT_REQUESTS: - raise HTTPException(status_code=429, detail="Too many requests - please try again later.") - current_requests += 1 +@app.get("/") +def read_root(): + if os.path.exists(__location__ + "/site"): + return RedirectResponse(url="/mkdocs") + # Return a json response + return {"message": "Crawl4AI API service is running"} + + +@app.post("/crawl", dependencies=[Depends(verify_token)]) +async def crawl(request: CrawlRequest) -> Dict[str, str]: + task_id = await crawler_service.submit_task(request) + return {"task_id": task_id} + +@app.get("/task/{task_id}", dependencies=[Depends(verify_token)]) +async def get_task_status(task_id: str): + task_info = crawler_service.task_manager.get_task(task_id) + if not task_info: + raise HTTPException(status_code=404, detail="Task not found") + + response = { + "status": task_info.status, + "created_at": task_info.created_at, + } + + if task_info.status == TaskStatus.COMPLETED: + # Convert CrawlResult to dict for JSON response + if isinstance(task_info.result, list): + response["results"] = [result.dict() for result in task_info.result] + else: + response["result"] = task_info.result.dict() + elif task_info.status == TaskStatus.FAILED: + response["error"] = task_info.error + + return response + +@app.post("/crawl_sync", dependencies=[Depends(verify_token)]) +async def crawl_sync(request: CrawlRequest) -> Dict[str, Any]: + task_id = await crawler_service.submit_task(request) + + # Wait up to 60 seconds for task completion + for _ in range(60): + task_info = crawler_service.task_manager.get_task(task_id) + if not task_info: + raise HTTPException(status_code=404, detail="Task not found") + + if task_info.status == TaskStatus.COMPLETED: + # Return same format as /task/{task_id} endpoint + if isinstance(task_info.result, list): + return {"status": task_info.status, "results": [result.dict() for result in task_info.result]} + return {"status": task_info.status, "result": task_info.result.dict()} + + if task_info.status == TaskStatus.FAILED: + raise HTTPException(status_code=500, detail=task_info.error) + + await asyncio.sleep(1) + + # If we get here, task didn't complete within timeout + raise HTTPException(status_code=408, detail="Task timed out") +@app.post("/crawl_direct", dependencies=[Depends(verify_token)]) +async def crawl_direct(request: CrawlRequest) -> Dict[str, Any]: try: - logging.debug("[LOG] Loading extraction and chunking strategies...") - crawl_request.extraction_strategy_args['verbose'] = True - crawl_request.chunking_strategy_args['verbose'] = True + crawler = await crawler_service.crawler_pool.acquire(**request.crawler_params) + extraction_strategy = crawler_service._create_extraction_strategy(request.extraction_config) - extraction_strategy = import_strategy("crawl4ai.extraction_strategy", crawl_request.extraction_strategy, **crawl_request.extraction_strategy_args) - chunking_strategy = import_strategy("crawl4ai.chunking_strategy", crawl_request.chunking_strategy, **crawl_request.chunking_strategy_args) - - # Use ThreadPoolExecutor to run the synchronous WebCrawler in async manner - logging.debug("[LOG] Running the WebCrawler...") - with ThreadPoolExecutor() as executor: - loop = asyncio.get_event_loop() - futures = [ - loop.run_in_executor( - executor, - get_crawler().run, - str(url), - crawl_request.word_count_threshold, - extraction_strategy, - chunking_strategy, - crawl_request.bypass_cache, - crawl_request.css_selector, - crawl_request.screenshot, - crawl_request.user_agent, - crawl_request.verbose + try: + if isinstance(request.urls, list): + results = await crawler.arun_many( + urls=[str(url) for url in request.urls], + extraction_strategy=extraction_strategy, + js_code=request.js_code, + wait_for=request.wait_for, + css_selector=request.css_selector, + screenshot=request.screenshot, + magic=request.magic, + cache_mode=request.cache_mode, + session_id=request.session_id, + **request.extra, ) - for url in crawl_request.urls - ] - results = await asyncio.gather(*futures) - - # if include_raw_html is False, remove the raw HTML content from the results - if not crawl_request.include_raw_html: - for result in results: - result.html = None - - return {"results": [result.model_dump() for result in results]} - finally: - async with lock: - current_requests -= 1 - -@app.get("/strategies/extraction", response_class=JSONResponse) -async def get_extraction_strategies(): - with open(f"{__location__}/docs/extraction_strategies.json", "r") as file: - return JSONResponse(content=file.read()) - -@app.get("/strategies/chunking", response_class=JSONResponse) -async def get_chunking_strategies(): - with open(f"{__location__}/docs/chunking_strategies.json", "r") as file: - return JSONResponse(content=file.read()) - + return {"results": [result.dict() for result in results]} + else: + result = await crawler.arun( + url=str(request.urls), + extraction_strategy=extraction_strategy, + js_code=request.js_code, + wait_for=request.wait_for, + css_selector=request.css_selector, + screenshot=request.screenshot, + magic=request.magic, + cache_mode=request.cache_mode, + session_id=request.session_id, + **request.extra, + ) + return {"result": result.dict()} + finally: + await crawler_service.crawler_pool.release(crawler) + except Exception as e: + logger.error(f"Error in direct crawl: {str(e)}") + raise HTTPException(status_code=500, detail=str(e)) + +@app.get("/health") +async def health_check(): + available_slots = await crawler_service.resource_monitor.get_available_slots() + memory = psutil.virtual_memory() + return { + "status": "healthy", + "available_slots": available_slots, + "memory_usage": memory.percent, + "cpu_usage": psutil.cpu_percent(), + } if __name__ == "__main__": import uvicorn - uvicorn.run(app, host="0.0.0.0", port=8888) + uvicorn.run(app, host="0.0.0.0", port=11235) \ No newline at end of file diff --git a/middlewares.py b/middlewares.py deleted file mode 100644 index e69de29..0000000 diff --git a/mkdocs.yml b/mkdocs.yml index 66bc347..1b26b9d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,36 +1,81 @@ site_name: Crawl4AI Documentation -docs_dir: docs/md +site_description: 🔥🕷️ Crawl4AI, Open-source LLM Friendly Web Crawler & Scrapper +site_url: https://docs.crawl4ai.com +repo_url: https://github.com/unclecode/crawl4ai +repo_name: unclecode/crawl4ai +docs_dir: docs/md_v2 + nav: - - Home: index.md - - First Steps: - - Introduction: introduction.md - - Installation: installation.md - - Quick Start: quickstart.md - - Examples: - - Intro: examples/index.md - - Structured Data Extraction: examples/json_css_extraction.md - - LLM Extraction: examples/llm_extraction.md - - JS Execution & CSS Filtering: examples/js_execution_css_filtering.md - - Hooks & Auth: examples/hooks_auth.md - - Summarization: examples/summarization.md - - Research Assistant: examples/research_assistant.md - - Full Details of Using Crawler: - - Crawl Request Parameters: full_details/crawl_request_parameters.md - - Crawl Result Class: full_details/crawl_result_class.md - - Session Based Crawling: full_details/session_based_crawling.md - - Advanced Features: full_details/advanced_features.md - - Advanced JsonCssExtraction: full_details/advanced_jsoncss_extraction.md - - Chunking Strategies: full_details/chunking_strategies.md - - Extraction Strategies: full_details/extraction_strategies.md - - Miscellaneous: - - Change Log: changelog.md - - Contact: contact.md + - Home: 'index.md' + - 'Installation': 'basic/installation.md' + - 'Docker Deplotment': 'basic/docker-deploymeny.md' + - 'Quick Start': 'basic/quickstart.md' + + - Basic: + - 'Simple Crawling': 'basic/simple-crawling.md' + - 'Output Formats': 'basic/output-formats.md' + - 'Browser Configuration': 'basic/browser-config.md' + - 'Page Interaction': 'basic/page-interaction.md' + - 'Content Selection': 'basic/content-selection.md' + - 'Cache Modes': 'basic/cache-modes.md' + + - Advanced: + - 'Content Processing': 'advanced/content-processing.md' + - 'Magic Mode': 'advanced/magic-mode.md' + - 'Hooks & Auth': 'advanced/hooks-auth.md' + - 'Proxy & Security': 'advanced/proxy-security.md' + - 'Session Management': 'advanced/session-management.md' + - 'Session Management (Advanced)': 'advanced/session-management-advanced.md' + + - Extraction: + - 'Overview': 'extraction/overview.md' + - 'LLM Strategy': 'extraction/llm.md' + - 'Json-CSS Extractor Basic': 'extraction/css.md' + - 'Json-CSS Extractor Advanced': 'extraction/css-advanced.md' + - 'Cosine Strategy': 'extraction/cosine.md' + - 'Chunking': 'extraction/chunking.md' + + - API Reference: + - 'Parameters Table': 'api/parameters.md' + - 'AsyncWebCrawler': 'api/async-webcrawler.md' + - 'AsyncWebCrawler.arun()': 'api/arun.md' + - 'CrawlResult': 'api/crawl-result.md' + - 'Strategies': 'api/strategies.md' + + - Tutorial: + - '1. Getting Started': 'tutorial/episode_01_Introduction_to_Crawl4AI_and_Basic_Installation.md' + - '2. Advanced Features': 'tutorial/episode_02_Overview_of_Advanced_Features.md' + - '3. Browser Setup': 'tutorial/episode_03_Browser_Configurations_&_Headless_Crawling.md' + - '4. Proxy Settings': 'tutorial/episode_04_Advanced_Proxy_and_Security_Settings.md' + - '5. Dynamic Content': 'tutorial/episode_05_JavaScript_Execution_and_Dynamic_Content_Handling.md' + - '6. Magic Mode': 'tutorial/episode_06_Magic_Mode_and_Anti-Bot_Protection.md' + - '7. Content Cleaning': 'tutorial/episode_07_Content_Cleaning_and_Fit_Markdown.md' + - '8. Media Handling': 'tutorial/episode_08_Media_Handling:_Images,_Videos,_and_Audio.md' + - '9. Link Analysis': 'tutorial/episode_09_Link_Analysis_and_Smart_Filtering.md' + - '10. User Simulation': 'tutorial/episode_10_Custom_Headers,_Identity,_and_User_Simulation.md' + - '11.1. JSON CSS': 'tutorial/episode_11_1_Extraction_Strategies:_JSON_CSS.md' + - '11.2. LLM Strategy': 'tutorial/episode_11_2_Extraction_Strategies:_LLM.md' + - '11.3. Cosine Strategy': 'tutorial/episode_11_3_Extraction_Strategies:_Cosine.md' + - '12. Session Crawling': 'tutorial/episode_12_Session-Based_Crawling_for_Dynamic_Websites.md' + - '13. Text Chunking': 'tutorial/episode_13_Chunking_Strategies_for_Large_Text_Processing.md' + - '14. Custom Workflows': 'tutorial/episode_14_Hooks_and_Custom_Workflow_with_AsyncWebCrawler.md' + theme: name: terminal palette: dark -# Add the css/extra.css +markdown_extensions: + - pymdownx.highlight: + anchor_linenums: true + - pymdownx.inlinehilite + - pymdownx.snippets + - pymdownx.superfences + - admonition + - pymdownx.details + - attr_list + - tables + extra_css: - assets/styles.css - assets/highlight.css @@ -38,4 +83,4 @@ extra_css: extra_javascript: - assets/highlight.min.js - - assets/highlight_init.js + - assets/highlight_init.js \ No newline at end of file diff --git a/requirements-dev.txt b/requirements-dev.txt deleted file mode 100644 index f2578c6..0000000 --- a/requirements-dev.txt +++ /dev/null @@ -1,2 +0,0 @@ --r requirements.txt -pytest \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 7d21f5a..ed259ac 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,16 @@ -aiosqlite==0.20.0 -html2text==2024.2.26 -lxml==5.3.0 -litellm==1.48.0 -numpy>=1.26.0,<2.1.1 -pillow==10.4.0 -playwright==1.47.0 -python-dotenv==1.0.1 -requests>=2.26.0,<2.32.3 -beautifulsoup4==4.12.3 \ No newline at end of file +aiosqlite~=0.20 +html2text~=2024.2 +lxml~=5.3 +litellm~=1.48 +numpy>=1.26.0,<3 +pillow~=10.4 +playwright>=1.47,<1.48 +python-dotenv~=1.0 +requests~=2.26 +beautifulsoup4~=4.12 +tf-playwright-stealth~=1.0 +xxhash~=3.4 +rank-bm25~=0.2 +aiofiles~=24.0 +colorama~=0.4 +snowballstemmer~=2.2 \ No newline at end of file diff --git a/setup.py b/setup.py index b827e6a..f5f3cf2 100644 --- a/setup.py +++ b/setup.py @@ -5,35 +5,41 @@ import shutil import subprocess import sys +import asyncio # Create the .crawl4ai folder in the user's home directory if it doesn't exist # If the folder already exists, remove the cache folder -crawl4ai_folder = Path.home() / ".crawl4ai" +crawl4ai_folder = os.getenv("CRAWL4_AI_BASE_DIRECTORY", Path.home()) / ".crawl4ai" cache_folder = crawl4ai_folder / "cache" +content_folders = ['html_content', 'cleaned_html', 'markdown_content', + 'extracted_content', 'screenshots'] +# Clean up old cache if exists if cache_folder.exists(): shutil.rmtree(cache_folder) +# Create new folder structure crawl4ai_folder.mkdir(exist_ok=True) cache_folder.mkdir(exist_ok=True) +for folder in content_folders: + (crawl4ai_folder / folder).mkdir(exist_ok=True) -# Read the requirements from requirements.txt +# Read requirements and version __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) with open(os.path.join(__location__, "requirements.txt")) as f: requirements = f.read().splitlines() -# Read version from __init__.py -with open("crawl4ai/__init__.py") as f: +with open("crawl4ai/__version__.py") as f: for line in f: if line.startswith("__version__"): version = line.split("=")[1].strip().strip('"') break -# Define the requirements for different environments +# Define requirements default_requirements = requirements -torch_requirements = ["torch", "nltk", "spacy", "scikit-learn"] -transformer_requirements = ["transformers", "tokenizers", "onnxruntime"] -cosine_similarity_requirements = ["torch", "transformers", "nltk", "spacy"] +torch_requirements = ["torch", "nltk", "scikit-learn"] +transformer_requirements = ["transformers", "tokenizers"] +cosine_similarity_requirements = ["torch", "transformers", "nltk" ] sync_requirements = ["selenium"] def install_playwright(): @@ -48,10 +54,24 @@ def install_playwright(): print(f"Unexpected error during Playwright installation: {e}") print("Please run 'python -m playwright install' manually after the installation.") +def run_migration(): + """Initialize database during installation""" + try: + print("Starting database initialization...") + from crawl4ai.async_database import async_db_manager + asyncio.run(async_db_manager.initialize()) + print("Database initialization completed successfully.") + except ImportError: + print("Warning: Database module not found. Will initialize on first use.") + except Exception as e: + print(f"Warning: Database initialization failed: {e}") + print("Database will be initialized on first use") + class PostInstallCommand(install): def run(self): install.run(self) install_playwright() + # run_migration() setup( name="Crawl4AI", @@ -64,7 +84,7 @@ def run(self): author_email="unclecode@kidocode.com", license="MIT", packages=find_packages(), - install_requires=default_requirements + ["playwright"], # Add playwright to default requirements + install_requires=default_requirements + ["playwright", "aiofiles"], # Added aiofiles extras_require={ "torch": torch_requirements, "transformer": transformer_requirements, @@ -75,6 +95,7 @@ def run(self): entry_points={ 'console_scripts': [ 'crawl4ai-download-models=crawl4ai.model_loader:main', + 'crawl4ai-migrate=crawl4ai.migrations:main', # Added migration command ], }, classifiers=[ diff --git a/tests/async/sample_wikipedia.html b/tests/async/sample_wikipedia.html new file mode 100644 index 0000000..a22b3e3 --- /dev/null +++ b/tests/async/sample_wikipedia.html @@ -0,0 +1,2179 @@ + + +Apple - Wikipedia + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Jump to content +
    +
    +
    + + + + +
    +
    + + + + + +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    +
    + +
    +
    +
    + +
    +
    +
    +
    +
    + +

    Apple

    + +
    + + +
    + +
    + + + +
    + +
    +
    +
    +
    +
    +
    + +
    +
    + + + +
    +
    +
    +
    +
    + + +
    +
    +
    +
    +
    +
    This is a good article. Click here for more information.
    +
    Page semi-protected
    +
    + +
    From Wikipedia, the free encyclopedia
    +
    +
    + + +
    + + +

    + + + +

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Apple +
    +
    'Cripps Pink' apples +
    +
    Flowers +
    Scientific classification Edit this classification +
    Kingdom: +Plantae +
    Clade: +Tracheophytes +
    Clade: +Angiosperms +
    Clade: +Eudicots +
    Clade: +Rosids +
    Order: +Rosales +
    Family: +Rosaceae +
    Genus: +Malus +
    Species: +
    M. domestica
    +
    Binomial name +
    Malus domestica
    +
    Synonyms[1][2] +
    +
    • M. communis Desf., 1768
    • +
    • M. pumila Mil.
    • +
    • M. frutescens Medik.
    • +
    • M. paradisiaca (L.) Medikus
    • +
    • M. sylvestris Mil.
    • +
    • Pyrus malus L.
    • +
    • Pyrus malus var. paradisiaca L.
    • +
    • Pyrus dioica Moench
    +
    +

    An apple is a round, edible fruit produced by an apple tree (Malus spp., among them the domestic or orchard apple; Malus domestica). Apple trees are cultivated worldwide and are the most widely grown species in the genus Malus. The tree originated in Central Asia, where its wild ancestor, Malus sieversii, is still found. Apples have been grown for thousands of years in Eurasia and were introduced to North America by European colonists. Apples have religious and mythological significance in many cultures, including Norse, Greek, and European Christian tradition. +

    Apples grown from seed tend to be very different from those of their parents, and the resultant fruit frequently lacks desired characteristics. For commercial purposes, including botanical evaluation, apple cultivars are propagated by clonal grafting onto rootstocks. Apple trees grown without rootstocks tend to be larger and much slower to fruit after planting. Rootstocks are used to control the speed of growth and the size of the resulting tree, allowing for easier harvesting. +

    There are more than 7,500 cultivars of apples. Different cultivars are bred for various tastes and uses, including cooking, eating raw, and cider or apple juice production. Trees and fruit are prone to fungal, bacterial, and pest problems, which can be controlled by a number of organic and non-organic means. In 2010, the fruit's genome was sequenced as part of research on disease control and selective breeding in apple production. +

    + +

    Etymology

    +

    The word apple, whose Old English ancestor is æppel, is descended from the Proto-Germanic noun *aplaz, descended in turn from Proto-Indo-European *h₂ébōl.[3] As late as the 17th century, the word also functioned as a generic term for all fruit, including nuts. This can be compared to the 14th-century Middle English expression appel of paradis, meaning a banana.[4] +

    +

    Description

    +

    The apple is a deciduous tree, generally standing 2 to 4.5 metres (6 to 15 feet) tall in cultivation and up to 15 m (49 ft) in the wild, though more typically 2 to 10 m (6.5 to 33 ft).[5][1] When cultivated, the size, shape and branch density are determined by rootstock selection and trimming method.[5] Apple trees may naturally have a rounded to erect crown with a dense canopy of leaves.[6] The bark of the trunk is dark gray or gray-brown, but young branches are reddish or dark-brown with a smooth texture.[1][7] When young twigs are covered in very fine downy hairs and become hairless as they become older.[7] +

    The buds are egg-shaped and dark red or purple in color; they range in size from 3 to 5 millimeters, but are usually less than 4 mm. The bud scales have very hairy edges. When emerging from the buds, the leaves are convolute, meaning that their edges overlap each other.[1] Leaves can be simple ovals (elliptic), medium or wide in width, somewhat egg-shaped with the wider portion toward their base (ovate), or even with sides that are more parallel to each other instead of curved (oblong) with a narrow pointed end.[7][1] The edges have broadly-angled teeth, but do not have lobes. The top surface of the leaves are glabrescent, almost hairless, while the undersides are densely covered in fine hairs.[1] The leaves are attached alternately by short leaf stems 1-to-3.5 cm (12-to-1+12 in) long.[6][1] +

    Blossoms are produced in spring simultaneously with the budding of the leaves and are produced on spurs and some long shoots.[5] When the flower buds first begin to open the petals are rose-pink and fade to white or light pink when fully open with each flower 3-to-4-centimeter (1-to-1+12-inch) in diameter.[1] The five-petaled flowers are group in an inflorescence consisting of a cyme with 3–7 flowers.[8] The central flower of the inflorescence is called the "king bloom"; it opens first and can develop a larger fruit.[6] Open apple blossoms are damaged by even brief exposures to temperatures −2 °C (28 °F) or less, although the overwintering wood and buds are hardy down to −40 °C (−40 °F).[8] +

    + +

    Fruit

    +

    The fruit is a pome that matures in late summer or autumn.[1] The true fruits or carpels are the harder interior chambers inside the apple's core. There are usually five carpels inside an apple, but there may be as few as three. Each of the chambers contains one or two seeds.[9] The edible flesh is formed from the receptacle at the base of the flower.[10] +

    + +

    The seeds are egg- to pear-shaped and may be colored from light brown or tan to a very dark brown, often with red shades or even purplish-black. They may have a blunt or sharp point.[11] The five sepals remain attached and stand out from the surface of the apple.[1] +

    The size of the fruit varies widely between cultivars, but generally has a diameter between 2.5 and 12 cm (1 and 5 in).[7] The shape is quite variable and may be nearly round, elongated, conical, or short and wide.[12] +

    The groundcolor of ripe apples is yellow, green, yellow-green or whitish yellow. The overcolor of ripe apples can be orange-red, pink-red, red, purple-red or brown-red. The overcolor amount can be 0–100%.[13] The skin may be wholly or partly russeted, making it rough and brown. The skin is covered in a protective layer of epicuticular wax.[14] The skin may also be marked with scattered dots.[1] The flesh is generally pale yellowish-white, though it can be pink, yellow or green.[13] +

    + +

    Chemistry

    +

    Important volatile compounds in apples that contribute to their scent and flavour include acetaldehyde, ethyl acetate, 1-butanal, ethanol, 2-methylbutanal, 3-methylbutanal, ethyl propionate, ethyl 2-methylpropionate, ethyl butyrate, ethyl 2-methyl butyrate, hexanal, 1-butanol, 3-methylbutyl acetate, 2-methylbutyl acetate, 1-propyl butyrate, ethyl pentanoate, amyl acetate, 2-methyl-1-butanol, trans-2-hexenal, ethyl hexanoate, hexanol.[15][16] +

    +

    Taxonomy

    +

    The apple as a species has more than 100 alternative scientific names, or synonyms.[17] In modern times, Malus pumila and Malus domestica are the two main names in use. M. pumila is the older name, but M. domestica has become much more commonly used starting in the 21st century, especially in the western world. Two proposals were made to make M. domestica a conserved name: the earlier proposal was voted down by the Committee for Vascular Plants of the IAPT in 2014, but in April 2017 the Committee decided, with a narrow majority, that the newly popular name should be conserved.[18] The General Committee of the IAPT decided in June 2017 to approve this change, officially conserving M. domestica.[19] Nevertheless, some works published after 2017 still use M. pumila as the correct name, under an alternate taxonomy.[2] +

    When first classified by Linnaeus in 1753, the pears, apples, and quinces were combined into one genus that he named Pyrus and he named the apple as Pyrus malus. This was widely accepted, however the botanist Philip Miller published an alternate classification in The Gardeners Dictionary with the apple species separated from Pyrus in 1754. He did not clearly indicate that by Malus pumila he meant the domesticated apple. Nonetheless, it was used as such by many botanists. When Moritz Balthasar Borkhausen published his scientific description of the apple in 1803 it may have been a new combination of P. malus var. domestica, but this was not directly referenced by Borkhausen.[17] The earliest use of var. domestica for the apple was by Georg Adolf Suckow in 1786.[2] +

    +

    Genome

    + +

    Apples are diploid, with two sets of chromosomes per cell (though triploid cultivars, with three sets, are not uncommon), have 17 chromosomes and an estimated genome size of approximately 650 Mb. Several whole genome sequences have been completed and made available. The first one in 2010 was based on the diploid cultivar 'Golden Delicious'.[20] However, this first whole genome sequence contained several errors,[21] in part owing to the high degree of heterozygosity in diploid apples which, in combination with an ancient genome duplication, complicated the assembly. Recently, double- and trihaploid individuals have been sequenced, yielding whole genome sequences of higher quality.[22][23] +

    The first whole genome assembly was estimated to contain around 57,000 genes,[20] though the more recent genome sequences support estimates between 42,000 and 44,700 protein-coding genes.[22][23] The availability of whole genome sequences has provided evidence that the wild ancestor of the cultivated apple most likely is Malus sieversii. Re-sequencing of multiple accessions has supported this, while also suggesting extensive introgression from Malus sylvestris following domestication.[24] +

    +

    Cultivation

    +

    History

    +
    Map of the origins of the cultivated apple. The wild origin is in Kazakhstan; hybridisations and repeated domestications followed, modifying many attributes of the fruit.[24]
    +
    color photograph of a hand holding a red apple
    Wild Malus sieversii apple in Kazakhstan
    +

    Central Asia is generally considered the center of origin for apples due to the genetic variability in specimens there.[25] The wild ancestor of Malus domestica was Malus sieversii, found growing wild in the mountains of Central Asia in southern Kazakhstan, Kyrgyzstan, Tajikistan, and northwestern China.[5][26] Cultivation of the species, most likely beginning on the forested flanks of the Tian Shan mountains, progressed over a long period of time and permitted secondary introgression of genes from other species into the open-pollinated seeds. Significant exchange with Malus sylvestris, the crabapple, resulted in populations of apples being more related to crabapples than to the more morphologically similar progenitor Malus sieversii. In strains without recent admixture the contribution of the latter predominates.[27][28][29] +

    The apple is thought to have been domesticated 4,000–10,000 years ago in the Tian Shan mountains, and then to have travelled along the Silk Road to Europe, with hybridization and introgression of wild crabapples from Siberia (M. baccata), the Caucasus (M. orientalis), and Europe (M. sylvestris). Only the M. sieversii trees growing on the western side of the Tian Shan mountains contributed genetically to the domesticated apple, not the isolated population on the eastern side.[24] +

    Chinese soft apples, such as M. asiatica and M. prunifolia, have been cultivated as dessert apples for more than 2,000 years in China. These are thought to be hybrids between M. baccata and M. sieversii in Kazakhstan.[24] +

    Among the traits selected for by human growers are size, fruit acidity, color, firmness, and soluble sugar. Unusually for domesticated fruits, the wild M. sieversii origin is only slightly smaller than the modern domesticated apple.[24] +

    At the Sammardenchia-Cueis site near Udine in Northeastern Italy, seeds from some form of apples have been found in material carbon dated to between 6570 and 5684 BCE.[30] Genetic analysis has not yet been successfully used to determine whether such ancient apples were wild Malus sylvestris or Malus domesticus containing Malus sieversii ancestry. It is hard to distinguish in the archeological record between foraged wild apples and apple plantations.[31] +

    There is indirect evidence of apple cultivation in the third millennium BCE in the Middle East.[31] There is direct evidence, apple cores, dated to the 10th century BCE from a Judean site between the Sinai and Negev. +[32] There was substantial apple production in European classical antiquity, and grafting was certainly known then.[31] Grafting is an essential part of modern domesticated apple production, to be able to propagate the best cultivars; it is unclear when apple tree grafting was invented.[31] +

    + +

    The Roman writer Pliny the Elder describes a method of storage for apples from his time in the 1st century. He says they should be placed in a room with good air circulation from a north facing window on a bed of straw, chaff, or mats with windfalls kept separately.[33] Though methods like this will extend the availabity of reasonably fresh apples, without refrigeration their lifespan is limited. Even sturdy winter apple varieties will only keep well until December in cool climates.[34] For longer storage medieval Europeans strung up cored and peeled apples to dry, either whole or sliced into rings.[35] +

    Of the many Old World plants that the Spanish introduced to Chiloé Archipelago in the 16th century, apple trees became particularly well adapted.[36] Apples were introduced to North America by colonists in the 17th century,[5] and the first named apple cultivar was introduced in Boston by Reverend William Blaxton in 1640.[37] The only apples native to North America are crab apples.[38] +

    Apple cultivars brought as seed from Europe were spread along Native American trade routes, as well as being cultivated on colonial farms. An 1845 United States apples nursery catalogue sold 350 of the "best" cultivars, showing the proliferation of new North American cultivars by the early 19th century.[38] In the 20th century, irrigation projects in Eastern Washington began and allowed the development of the multibillion-dollar fruit industry, of which the apple is the leading product.[5] +

    Until the 20th century, farmers stored apples in frostproof cellars during the winter for their own use or for sale. Improved transportation of fresh apples by train and road replaced the necessity for storage.[39][40] Controlled atmosphere facilities are used to keep apples fresh year-round. Controlled atmosphere facilities use high humidity, low oxygen, and controlled carbon dioxide levels to maintain fruit freshness. They were first researched at Cambridge University in the 1920s and first used in the United States in the 1950s.[41] +

    +

    Breeding

    + +
    An apple tree in Germany
    +

    Many apples grow readily from seeds. However, apples must be propagated asexually to obtain cuttings with the characteristics of the parent. This is because seedling apples are "extreme heterozygotes". Rather than resembling their parents, seedlings are all different from each other and from their parents.[42] Triploid cultivars have an additional reproductive barrier in that three sets of chromosomes cannot be divided evenly during meiosis, yielding unequal segregation of the chromosomes (aneuploids). Even in the case when a triploid plant can produce a seed (apples are an example), it occurs infrequently, and seedlings rarely survive.[43] +

    Because apples are not true breeders when planted as seeds, propagation usually involves grafting of cuttings. The rootstock used for the bottom of the graft can be selected to produce trees of a large variety of sizes, as well as changing the winter hardiness, insect and disease resistance, and soil preference of the resulting tree. Dwarf rootstocks can be used to produce very small trees (less than 3.0 m or 10 ft high at maturity), which bear fruit many years earlier in their life cycle than full size trees, and are easier to harvest.[44] +

    Dwarf rootstocks for apple trees can be traced as far back as 300 BCE, to the area of Persia and Asia Minor. Alexander the Great sent samples of dwarf apple trees to Aristotle's Lyceum. Dwarf rootstocks became common by the 15th century and later went through several cycles of popularity and decline throughout the world.[45] The majority of the rootstocks used to control size in apples were developed in England in the early 1900s. The East Malling Research Station conducted extensive research into rootstocks, and their rootstocks are given an "M" prefix to designate their origin. Rootstocks marked with an "MM" prefix are Malling-series cultivars later crossed with trees of 'Northern Spy' in Merton, England.[46] +

    Most new apple cultivars originate as seedlings, which either arise by chance or are bred by deliberately crossing cultivars with promising characteristics.[47] The words "seedling", "pippin", and "kernel" in the name of an apple cultivar suggest that it originated as a seedling. Apples can also form bud sports (mutations on a single branch). Some bud sports turn out to be improved strains of the parent cultivar. Some differ sufficiently from the parent tree to be considered new cultivars.[48] +

    Apples have been acclimatized in Ecuador at very high altitudes, where they can often, with the needed factors, provide crops twice per year because of constant temperate conditions year-round.[49] +

    +

    Pollination

    + +
    Apple blossom from an old Ayrshire cultivar
    +
    An orchard mason bee on an apple bloom in British Columbia, Canada
    +

    Apples are self-incompatible; they must cross-pollinate to develop fruit. During the flowering each season, apple growers often utilize pollinators to carry pollen. Honey bees are most commonly used. Orchard mason bees are also used as supplemental pollinators in commercial orchards. Bumblebee queens are sometimes present in orchards, but not usually in sufficient number to be significant pollinators.[48][50] +

    Cultivars are sometimes classified by the day of peak bloom in the average 30-day blossom period, with pollinizers selected from cultivars within a 6-day overlap period. There are four to seven pollination groups in apples, depending on climate: +

    +
    • Group A – Early flowering, 1 to 3 May in England ('Gravenstein', 'Red Astrachan')
    • +
    • Group B – 4 to 7 May ('Idared', 'McIntosh')
    • +
    • Group C – Mid-season flowering, 8 to 11 May ('Granny Smith', 'Cox's Orange Pippin')
    • +
    • Group D – Mid/late season flowering, 12 to 15 May ('Golden Delicious', 'Calville blanc d'hiver')
    • +
    • Group E – Late flowering, 16 to 18 May ('Braeburn', 'Reinette d'Orléans')
    • +
    • Group F – 19 to 23 May ('Suntan')
    • +
    • Group H – 24 to 28 May ('Court-Pendu Gris' – also called Court-Pendu plat)
    +

    One cultivar can be pollinated by a compatible cultivar from the same group or close (A with A, or A with B, but not A with C or D).[51] +

    +

    Maturation and harvest

    + +
    L. K. Relander, the former President of Finland, with his family picking apples in the 1930s
    +

    Cultivars vary in their yield and the ultimate size of the tree, even when grown on the same rootstock. Some cultivars, if left unpruned, grow very large—letting them bear more fruit, but making harvesting more difficult. Depending on tree density (number of trees planted per unit surface area), mature trees typically bear 40–200 kg (90–440 lb) of apples each year, though productivity can be close to zero in poor years. Apples are harvested using three-point ladders that are designed to fit amongst the branches. Trees grafted on dwarfing rootstocks bear about 10–80 kg (20–180 lb) of fruit per year.[48] +

    Some farms with apple orchards open them to the public so consumers can pick their own apples.[52] +

    Crops ripen at different times of the year according to the cultivar. Cultivar that yield their crop in the summer include 'Sweet Bough' and 'Duchess'; fall producers include 'Blenheim'; winter producers include 'King', 'Swayzie', and 'Tolman Sweet'.[38] +

    +

    Storage

    +
    Different apple cultivars in a wholesale food market
    +

    Commercially, apples can be stored for months in controlled atmosphere chambers. Apples are commonly stored in chambers with lowered concentrations of oxygen to reduce respiration and slow softening and other changes if the fruit is already fully ripe. The gas ethylene is used by plants as a hormone which promotes ripening, decreasing the time an apple can be stored. For storage longer than about six months the apples are picked earlier, before full ripeness, when ethylene production by the fruit is low. However, in many varieties this increases their sensitivity to carbon dioxide, which also must be controlled.[53] +

    For home storage, most culitvars of apple can be stored for three weeks in a pantry and four to six weeks from the date of purchase in a refrigerator that maintains 4 to 0 °C (39 to 32 °F).[54][55] Some varieties of apples (e.g. 'Granny Smith' and 'Fuji') have more than three times the storage life of others.[56] +

    Non-organic apples may be sprayed with a substance 1-methylcyclopropene blocking the apples' ethylene receptors, temporarily preventing them from ripening.[57] +

    +

    Pests and diseases

    + +
    Codling moth larva tunnelling inside an apple
    +

    Apple trees are susceptible to fungal and bacterial diseases, and to damage by insect pests. Many commercial orchards pursue a program of chemical sprays to maintain high fruit quality, tree health, and high yields. These prohibit the use of synthetic pesticides, though some older pesticides are allowed. Organic methods include, for instance, introducing its natural predator to reduce the population of a particular pest. +

    A wide range of pests and diseases can affect the plant. Three of the more common diseases or pests are mildew, aphids, and apple scab. +

    +
    • Mildew is characterized by light grey powdery patches appearing on the leaves, shoots and flowers, normally in spring. The flowers turn a creamy yellow color and do not develop correctly. This can be treated similarly to Botrytis—eliminating the conditions that caused the disease and burning the infected plants are among recommended actions.[58]
    • +
    • Aphids are small insects with sucking mouthparts. Five species of aphids commonly attack apples: apple grain aphid, rosy apple aphid, apple aphid, spirea aphid, and the woolly apple aphid. The aphid species can be identified by color, time of year, and by differences in the cornicles (small paired projections from their rear).[59] Aphids feed on foliage using needle-like mouth parts to suck out plant juices. When present in high numbers, certain species reduce tree growth and vigor.[60]
    • +
    • Apple scab: Apple scab causes leaves to develop olive-brown spots with a velvety texture that later turn brown and become cork-like in texture. The disease also affects the fruit, which also develops similar brown spots with velvety or cork-like textures. Apple scab is spread through fungus growing in old apple leaves on the ground and spreads during warm spring weather to infect the new year's growth.[61]
    +

    Among the most serious disease problems is a bacterial disease called fireblight, and three fungal diseases: Gymnosporangium rust, black spot,[62] and bitter rot.[63] Codling moths, and the apple maggots of fruit flies, cause serious damage to apple fruits, making them unsaleable. Young apple trees are also prone to mammal pests like mice and deer, which feed on the soft bark of the trees, especially in winter.[61] The larvae of the apple clearwing moth (red-belted clearwing) burrow through the bark and into the phloem of apple trees, potentially causing significant damage.[64] +

    +

    Cultivars

    + +
    An assortment of apple cultivars
    +

    There are more than 7,500 known cultivars (cultivated varieties) of apples.[65] Cultivars vary in their yield and the ultimate size of the tree, even when grown on the same rootstock.[66] Different cultivars are available for temperate and subtropical climates. The UK's National Fruit Collection, which is the responsibility of the Department of Environment, Food, and Rural Affairs, includes a collection of over 2,000 cultivars of apple tree in Kent.[67] The University of Reading, which is responsible for developing the UK national collection database, provides access to search the national collection. The University of Reading's work is part of the European Cooperative Programme for Plant Genetic Resources of which there are 38 countries participating in the Malus/Pyrus work group.[68] +

    The UK's national fruit collection database contains much information on the characteristics and origin of many apples, including alternative names for what is essentially the same "genetic" apple cultivar. Most of these cultivars are bred for eating fresh (dessert apples), though some are cultivated specifically for cooking (cooking apples) or producing cider. Cider apples are typically too tart and astringent to eat fresh, but they give the beverage a rich flavor that dessert apples cannot.[69] +

    In the United States there are many apple breeding programs associated with universities. Cornell University has had a program operating since 1880 in Geneva, New York. Among their recent well known apples is the 'SnapDragon' cultivar released in 2013. In the west Washington State University started a program to support their apple industry in 1994 and released the 'Cosmic Crisp' cultivar in 2017. The third most grown apple cultivar in the United States is the 'Honeycrisp', released by the University of Minnesota program in 1991.[70] Unusually for a popular cultivar, the 'Honeycrisp' is not directly related to another popular apple cultivar but instead to two unsuccessful cultivars.[71] In Europe there are also many breeding programs such as the Julius Kühn-Institut, the German federal research center for cultivated plants.[72] +

    Commercially popular apple cultivars are soft but crisp. Other desirable qualities in modern commercial apple breeding are a colorful skin, absence of russeting, ease of shipping, lengthy storage ability, high yields, disease resistance, common apple shape, and developed flavor.[66] Modern apples are generally sweeter than older cultivars, as popular tastes in apples have varied over time. Most North Americans and Europeans favor sweet, subacid apples, but tart apples have a strong minority following.[73] Extremely sweet apples with barely any acid flavor are popular in Asia,[73] especially the Indian subcontinent.[69] +

    +
    Less common apple cultivars from an orchard in Italy
    +

    Old cultivars are often oddly shaped, russeted, and grow in a variety of textures and colors. Some find them to have better flavor than modern cultivars, but they may have other problems that make them commercially unviable—low yield, disease susceptibility, poor tolerance for storage or transport, or just being the "wrong" size.[74] A few old cultivars are still produced on a large scale, but many have been preserved by home gardeners and farmers that sell directly to local markets. Many unusual and locally important cultivars with their own unique taste and appearance exist; apple conservation campaigns have sprung up around the world to preserve such local cultivars from extinction. In the United Kingdom, old cultivars such as 'Cox's Orange Pippin' and 'Egremont Russet' are still commercially important even though by modern standards they are low yielding and susceptible to disease.[5] +

    +

    Production

    + + + + + + + + + + + + + + + + + + + + + + + +
    Apple production
    +

    2022, millions of tonnes
    +

    +
     China47.6 +
     United States4.8 +
     Turkey4.4 +
     Poland4.3 +
     India2.6 +
    World95.8 +
    Source: FAOSTAT of the United Nations[75] +
    +

    World production of apples in 2022 was 96 million tonnes, with China producing 50% of the total (table).[75] Secondary producers were the United States, Turkey, and Poland.[75] +

    +

    Toxicity

    +

    Amygdalin

    +

    Apple seeds contain small amounts of amygdalin, a sugar and cyanide compound known as a cyanogenic glycoside. Ingesting small amounts of apple seeds causes no ill effects, but consumption of extremely large doses can cause adverse reactions. It may take several hours before the poison takes effect, as cyanogenic glycosides must be hydrolyzed before the cyanide ion is released.[76] The U.S. National Library of Medicine's Hazardous Substances Data Bank records no cases of amygdalin poisoning from consuming apple seeds.[77] +

    +

    Allergy

    +

    One form of apple allergy, often found in northern Europe, is called birch-apple syndrome and is found in people who are also allergic to birch pollen.[78] Allergic reactions are triggered by a protein in apples that is similar to birch pollen, and people affected by this protein can also develop allergies to other fruits, nuts, and vegetables. Reactions, which entail oral allergy syndrome (OAS), generally involve itching and inflammation of the mouth and throat,[78] but in rare cases can also include life-threatening anaphylaxis.[79] This reaction only occurs when raw fruit is consumed—the allergen is neutralized in the cooking process. The variety of apple, maturity and storage conditions can change the amount of allergen present in individual fruits. Long storage times can increase the amount of proteins that cause birch-apple syndrome.[78] +

    In other areas, such as the Mediterranean, some individuals have adverse reactions to apples because of their similarity to peaches.[78] This form of apple allergy also includes OAS, but often has more severe symptoms, such as vomiting, abdominal pain and urticaria, and can be life-threatening. Individuals with this form of allergy can also develop reactions to other fruits and nuts. Cooking does not break down the protein causing this particular reaction, so affected individuals cannot eat raw or cooked apples. Freshly harvested, over-ripe fruits tend to have the highest levels of the protein that causes this reaction.[78] +

    Breeding efforts have yet to produce a hypoallergenic fruit suitable for either of the two forms of apple allergy.[78] +

    +

    Uses

    + +

    Nutrition

    +
    + +
    Apples, with skin (edible parts)
    Nutritional value per 100 g (3.5 oz)
    Energy218 kJ (52 kcal)
    13.81 g
    Sugars10.39
    Dietary fiber2.4 g
    +
    0.17 g
    +
    0.26 g
    + + + + +
    Vitamins and minerals
    +
    VitaminsQuantity
    %DV
    Vitamin A equiv.
    0%
    3 μg
    0%
    27 μg
    29 μg
    Thiamine (B1)
    1%
    0.017 mg
    Riboflavin (B2)
    2%
    0.026 mg
    Niacin (B3)
    1%
    0.091 mg
    Pantothenic acid (B5)
    1%
    0.061 mg
    Vitamin B6
    2%
    0.041 mg
    Folate (B9)
    1%
    3 μg
    Vitamin C
    5%
    4.6 mg
    Vitamin E
    1%
    0.18 mg
    Vitamin K
    2%
    2.2 μg
    +
    MineralsQuantity
    %DV
    Calcium
    0%
    6 mg
    Iron
    1%
    0.12 mg
    Magnesium
    1%
    5 mg
    Manganese
    2%
    0.035 mg
    Phosphorus
    1%
    11 mg
    Potassium
    4%
    107 mg
    Sodium
    0%
    1 mg
    Zinc
    0%
    0.04 mg
    +
    Other constituentsQuantity
    Water85.56 g
    +

    Percentages estimated using US recommendations for adults,[80] except for potassium, which is estimated based on expert recommendation from the National Academies.[81]
    +
    +

    A raw apple is 86% water and 14% carbohydrates, with negligible content of fat and protein (table). A reference serving of a raw apple with skin weighing 100 g (3.5 oz) provides 52 calories and a moderate content of dietary fiber (table). Otherwise, there is low content of micronutrients, with the Daily Values of all falling below 10% (table). +

    +

    Culinary

    + +
    Machine for paring, coring, and slicing apples, from Henry B. Scammell's 1897 handbook Cyclopedia of Valuable Receipts
    +

    Apples varieties can be grouped as cooking apples, eating apples, and cider apples, the last so astringent as to be "almost inedible".[82] Apples are consumed as juice, raw in salads, baked in pies, cooked into sauces and apple butter, or baked.[83] They are sometimes used as an ingredient in savory foods, such as sausage and stuffing.[84] +

    Several techniques are used to preserve apples and apple products. Traditional methods include drying and making apple butter.[82] Juice and cider are produced commercially; cider is a significant industry in regions such as the West of England and Normandy.[82] +

    A toffee apple (UK) or caramel apple (US) is a confection made by coating an apple in hot toffee or caramel candy respectively and allowing it to cool.[85][8] Apples and honey are a ritual food pairing eaten during the Jewish New Year of Rosh Hashanah.[86] +

    Apples are an important ingredient in many desserts, such as pies, crumbles, and cakes. When cooked, some apple cultivars easily form a puree known as apple sauce, which can be cooked down to form a preserve, apple butter. They are often baked or stewed, and are cooked in some meat dishes.[82] +

    + +

    Apples are milled or pressed to produce apple juice, which may be drunk unfiltered (called apple cider in North America), or filtered. Filtered juice is often concentrated and frozen, then reconstituted later and consumed. Apple juice can be fermented to make cider (called hard cider in North America), ciderkin, and vinegar.[8] Through distillation, various alcoholic beverages can be produced, such as applejack, Calvados, and apple brandy.[8][87] +

    +

    Organic production

    +

    Organic apples are commonly produced in the United States.[88] Due to infestations by key insects and diseases, organic production is difficult in Europe.[89] The use of pesticides containing chemicals, such as sulfur, copper, microorganisms, viruses, clay powders, or plant extracts (pyrethrum, neem) has been approved by the EU Organic Standing Committee to improve organic yield and quality.[89] A light coating of kaolin, which forms a physical barrier to some pests, also may help prevent apple sun scalding.[48] +

    +

    Non-browning apples

    +

    Apple skins and seeds contain polyphenols.[90] These are oxidised by the enzyme polyphenol oxidase, which causes browning in sliced or bruised apples, by catalyzing the oxidation of phenolic compounds to o-quinones, a browning factor.[91] Browning reduces apple taste, color, and food value. Arctic apples, a non-browning group of apples introduced to the United States market in 2019, have been genetically modified to silence the expression of polyphenol oxidase, thereby delaying a browning effect and improving apple eating quality.[92][93] The US Food and Drug Administration in 2015, and Canadian Food Inspection Agency in 2017, determined that Arctic apples are as safe and nutritious as conventional apples.[94][95] +

    +

    Other products

    +

    Apple seed oil is obtained by pressing apple seeds for manufacturing cosmetics.[96] +

    +

    In culture

    + +

    Germanic paganism

    +
    Illustration of girl in a red dress, holding 3 candles in one hand and a basket of apples in the other
    "Brita as Iduna" (1901) by Carl Larsson
    +

    In Norse mythology, the goddess Iðunn is portrayed in the Prose Edda (written in the 13th century by Snorri Sturluson) as providing apples to the gods that give them eternal youthfulness. The English scholar H. R. Ellis Davidson links apples to religious practices in Germanic paganism, from which Norse paganism developed. She points out that buckets of apples were found in the Oseberg ship burial site in Norway, that fruit and nuts (Iðunn having been described as being transformed into a nut in Skáldskaparmál) have been found in the early graves of the Germanic peoples in England and elsewhere on the continent of Europe, which may have had a symbolic meaning, and that nuts are still a recognized symbol of fertility in southwest England.[97] +

    Davidson notes a connection between apples and the Vanir, a tribe of gods associated with fertility in Norse mythology, citing an instance of eleven "golden apples" being given to woo the beautiful Gerðr by Skírnir, who was acting as messenger for the major Vanir god Freyr in stanzas 19 and 20 of Skírnismál. Davidson also notes a further connection between fertility and apples in Norse mythology in chapter 2 of the Völsunga saga: when the major goddess Frigg sends King Rerir an apple after he prays to Odin for a child, Frigg's messenger (in the guise of a crow) drops the apple in his lap as he sits atop a mound.[97] Rerir's wife's consumption of the apple results in a six-year pregnancy and the birth (by Caesarean section) of their son—the hero Völsung.[98] +

    Further, Davidson points out the "strange" phrase "Apples of Hel" used in an 11th-century poem by the skald Thorbiorn Brúnarson. She states this may imply that the apple was thought of by Brúnarson as the food of the dead. Further, Davidson notes that the potentially Germanic goddess Nehalennia is sometimes depicted with apples and that parallels exist in early Irish stories. Davidson asserts that while cultivation of the apple in Northern Europe extends back to at least the time of the Roman Empire and came to Europe from the Near East, the native varieties of apple trees growing in Northern Europe are small and bitter. Davidson concludes that in the figure of Iðunn "we must have a dim reflection of an old symbol: that of the guardian goddess of the life-giving fruit of the other world."[97] +

    +

    Greek mythology

    +
    Heracles with the apple of Hesperides
    +

    Apples appear in many religious traditions, including Greek and Roman mythology where it has an ambiguous symbolism of discord, fertility, or courtship.[99] In Greek mythology, the Greek hero Heracles, as a part of his Twelve Labours, was required to travel to the Garden of the Hesperides and pick the golden apples off the Tree of Life growing at its center.[100] +

    The Greek goddess of discord, Eris, became disgruntled after she was excluded from the wedding of Peleus and Thetis.[101] In retaliation, she tossed a golden apple inscribed Καλλίστη (Kallistē, "For the most beautiful one"), into the wedding party. Three goddesses claimed the apple: Hera, Athena, and Aphrodite. Paris of Troy was appointed to select the recipient. After being bribed by both Hera and Athena, Aphrodite tempted him with the most beautiful woman in the world, Helen of Sparta. He awarded the apple to Aphrodite, thus indirectly causing the Trojan War.[102][103] +

    The apple was thus considered, in ancient Greece, sacred to Aphrodite. To throw an apple at someone was to symbolically declare one's love; and similarly, to catch it was to symbolically show one's acceptance of that love. An epigram claiming authorship by Plato states:[104] +

    +

    I throw the apple at you, and if you are willing to love me, take it and share your girlhood with me; but if your thoughts are what I pray they are not, even then take it, and consider how short-lived is beauty.

    — Plato, Epigram VII
    +

    Atalanta, also of Greek mythology, raced all her suitors in an attempt to avoid marriage. She outran all but Hippomenes (also known as Melanion, a name possibly derived from melon, the Greek word for both "apple" and fruit in general),[100] who defeated her by cunning, not speed. Hippomenes knew that he could not win in a fair race, so he used three golden apples (gifts of Aphrodite, the goddess of love) to distract Atalanta. It took all three apples and all of his speed, but Hippomenes was finally successful, winning the race and Atalanta's hand.[105][106] +

    +

    Celtic mythology

    +

    In Celtic mythology, the otherworld has many names, including Emain Ablach, "Emain of the Apple-trees". A version of this is Avalon in Arthurian legend, or in Welsh Ynys Afallon, "Island of Apples".[107] +

    +

    China

    +
    Píngānguǒ ("Peace apples") on sale in Beijing for Christmas Eve (2017)
    +

    In China, apples symbolise peace, since the sounds of the first element ("píng") in the words "apple" (苹果, Píngguǒ) and "peace" (平安, Píng'ān) are homophonous in Mandarin and Cantonese.[3][108] When these two words are combined, the word Píngānguǒ (平安果, "Peace apples") is formed. This association developed further as the name for Christmas Eve in Mandarin is Píngānyè (平安夜, "Peaceful/Quiet Evening"), which made the gifting of apples at this season to friends and associates popular, as a way to wish them peace and safety.[108] +

    +

    Christian art

    +
    Adam and Eve by Albrecht Dürer (1507), showcasing the apple as a symbol of sin
    +

    Though the forbidden fruit of Eden in the Book of Genesis is not identified, popular Christian tradition has held that it was an apple that Eve coaxed Adam to share with her.[109] The origin of the popular identification with a fruit unknown in the Middle East in biblical times is found in wordplay with the Latin words mālum (an apple) and mălum (an evil), each of which is normally written malum.[110] The tree of the forbidden fruit is called "the tree of the knowledge of good and evil" in Genesis 2:17,[111] and the Latin for "good and evil" is bonum et malum.[112] +

    Renaissance painters may also have been influenced by the story of the golden apples in the Garden of Hesperides. As a result, in the story of Adam and Eve, the apple became a symbol for knowledge, immortality, temptation, the fall of man into sin, and sin itself. The larynx in the human throat has been called the "Adam's apple" because of a notion that it was caused by the forbidden fruit remaining in the throat of Adam. The apple as symbol of sexual seduction has been used to imply human sexuality, possibly in an ironic vein.[109] +

    +

    Proverb

    +

    The proverb, "An apple a day keeps the doctor away", addressing the supposed health benefits of the fruit, has been traced to 19th-century Wales, where the original phrase was "Eat an apple on going to bed, and you'll keep the doctor from earning his bread".[113] In the 19th century and early 20th, the phrase evolved to "an apple a day, no doctor to pay" and "an apple a day sends the doctor away"; the phrasing now commonly used was first recorded in 1922.[114] +

    +

    See also

    + +

    References

    +
    +
      +
    1. ^ Jump up to: a b c d e f g h i j k Dickson, Elizabeth E. (28 May 2021). "Malus domestica". Flora of North America. Archived from the original on 28 July 2024. Retrieved 27 July 2024. +
    2. +
    3. ^ Jump up to: a b c "Malus domestica (Suckow) Borkh". Plants of the World Online. Royal Botanic Gardens, Kew. Retrieved 31 July 2024. +
    4. +
    5. ^ Jump up to: a b Lim, Lisa (6 July 2021). "Where the word 'apple' came from and why the forbidden fruit was unlucky to be linked with the fall of man". Language Matters. South China Morning Post. Hong Kong, China: Alibaba Group. Archived from the original on 28 June 2023. Retrieved 28 June 2023. +
    6. +
    7. ^ "Origin and meaning of "apple" by Online Etymology Dictionary". Online Etymology Dictionary. Archived from the original on 21 December 2019. Retrieved 22 November 2019. +
    8. +
    9. ^ Jump up to: a b c d e f g Rieger, Mark. "Apple - Malus domestica". HORT 3020: Intro Fruit Crops. University of Georgia. Archived from the original on 21 January 2008. Retrieved 22 January 2008. +
    10. +
    11. ^ Jump up to: a b c "Apples - Malus domestica". North Carolina Extension Gardener Plant Toolbox. North Carolina State University. Archived from the original on 31 May 2024. Retrieved 31 July 2024. +
    12. +
    13. ^ Jump up to: a b c d Heil, Kenneth D.; O'Kane, Jr., Steve L.; Reeves, Linda Mary; Clifford, Arnold (2013). Flora of the Four Corners Region: Vascular Plants of the San Juan River Drainage, Arizona, Colorado, New Mexico, and Utah (First ed.). St. Louis, Missouri: Missouri Botanical Garden. p. 909. ISBN 978-1-930723-84-9. ISSN 0161-1542. LCCN 2012949654. OCLC 859541992. Retrieved 27 July 2024. +
    14. +
    15. ^ Jump up to: a b c d e Lim, Tong Kwee (2012). "Malus x domestica". Edible Medicinal and Non-Medicinal Plants. Vol. 4, Fruit (First ed.). Dordrecht, the Netherlands: Springer. pp. 414–415. doi:10.1007/978-94-007-4053-2_49. ISBN 978-94-007-4053-2. OCLC 795503871. +
    16. +
    17. ^ Juniper, Barrie E.; Mabberley, David J. (2006). The Story of the Apple (First ed.). Portland, Oregon: Timber Press. p. 27. ISBN 978-0-88192-784-9. LCCN 2006011869. OCLC 67383484. Retrieved 1 August 2024. +
    18. +
    19. ^ "Fruit glossary". Royal Horticultural Society. Archived from the original on 7 August 2024. Retrieved 7 August 2024. +
    20. +
    21. ^ Burford, Tom (2013). Apples of North America : 192 Exceptional Varieties for Gardeners, Growers and Cooks (First ed.). Portland, Oregon: Timber Press. pp. 22, 50, 55, 122, 123, 137, 141, 147, 159, 245, 246. ISBN 978-1-60469-249-5. LCCN 2012045130. OCLC 819860825. +
    22. +
    23. ^ "Shape". Western Agricultural Research Center. Montana State University. Archived from the original on 23 April 2024. Retrieved 30 July 2024. +
    24. +
    25. ^ Jump up to: a b Janick, Jules; Cummins, James N.; Brown, Susan K.; Hemmat, Minou (1996). "Chapter 1: Apples" (PDF). Fruit Breeding. Vol. I: Tree and Tropical Fruits. New York: John Wiley & Sons. pp. 9, 48. ISBN 978-0-471-31014-3. LCCN 95016407. OCLC 1302621533. Archived (PDF) from the original on 19 July 2013. Retrieved 30 August 2024. +
    26. +
    27. ^ "Natural Waxes on Fruits". Postharvest.tfrec.wsu.edu. 29 October 2010. Archived from the original on 24 May 2013. Retrieved 14 June 2013. +
    28. +
    29. ^ Flath, R. A.; Black, D. R.; Forrey, R. R.; McDonald, G. M.; Mon, T. R.; Teranishi, R. (1 August 1969). "Volatiles in Gravenstein Apple Essence Identified by GC-Mass Spectrometry". Journal of Chromatographic Science. 7 (8): 508. doi:10.1093/CHROMSCI/7.8.508. +
    30. +
    31. ^ Flath, Robert A.; Black, Dale Robert.; Guadagni, Dante G.; McFadden, William H.; Schultz, Thomas H. (January 1967). "Identification and organoleptic evaluation of compounds in Delicious apple essence". Journal of Agricultural and Food Chemistry. 15 (1): 29. doi:10.1021/jf60149a032. +
    32. +
    33. ^ Jump up to: a b Qian, Guan-Ze; Liu, Lian-Fen; Tang, Geng-Guo (April 2010). "(1933) Proposal to conserve the name Malus domestica against M. pumila, M. communis, M. frutescens, and Pyrus dioica ( Rosaceae )". Taxon. 59 (2): 650–652. doi:10.1002/tax.592038. +
    34. +
    35. ^ Applequist, Wendy L. (2017). "Report of the Nomenclature Committee for Vascular Plants: 69" (PDF). Taxon. 66 (2): 500–513. doi:10.12705/662.17. Archived (PDF) from the original on 7 May 2024. +
    36. +
    37. ^ Wilson, Karen L. (June 2017). "Report of the General Committee: 18". Taxon. 66 (3): 742. doi:10.12705/663.15. +
    38. +
    39. ^ Jump up to: a b Velasco, Riccardo; Zharkikh, Andrey; Affourtit, Jason; Dhingra, Amit; Cestaro, Alessandro; et al. (2010). "The genome of the domesticated apple (Malus × domestica Borkh.)". Nature Genetics. 42 (10): 833–839. doi:10.1038/ng.654. PMID 20802477. S2CID 14854514. +
    40. +
    41. ^ Di Pierro, Erica A.; Gianfranceschi, Luca; Di Guardo, Mario; Koehorst-Van Putten, Herma J.J.; Kruisselbrink, Johannes W.; et al. (2016). "A high-density, multi-parental SNP genetic map on apple validates a new mapping approach for outcrossing species". Horticulture Research. 3 (1): 16057. Bibcode:2016HorR....316057D. doi:10.1038/hortres.2016.57. PMC 5120355. PMID 27917289. +
    42. +
    43. ^ Jump up to: a b Daccord, Nicolas; Celton, Jean-Marc; Linsmith, Gareth; et al. (2017). "High-quality de novo assembly of the apple genome and methylome dynamics of early fruit development". Nature Genetics. 49 (7). Nature Communications: 1099–1106. doi:10.1038/ng.3886. hdl:10449/42064. PMID 28581499. S2CID 24690391. +
    44. +
    45. ^ Jump up to: a b Zhang, Liyi; Hu, Jiang; Han, Xiaolei; Li, Jingjing; Gao, Yuan; et al. (2019). "A high-quality apple genome assembly reveals the association of a retrotransposon and red fruit colour". Nature Communications. 10 (1). Nature Genetics: 1494. Bibcode:2019NatCo..10.1494Z. doi:10.1038/s41467-019-09518-x. PMC 6445120. PMID 30940818. +
    46. +
    47. ^ Jump up to: a b c d e Duan, Naibin; Bai, Yang; Sun, Honghe; Wang, Nan; Ma, Yumin; et al. (2017). "Genome re-sequencing reveals the history of apple and supports a two-stage model for fruit enlargement". Nature Communications. 8 (1): 249. Bibcode:2017NatCo...8..249D. doi:10.1038/s41467-017-00336-7. PMC 5557836. PMID 28811498. +
    48. +
    49. ^ Richards, Christopher M.; Volk, Gayle M.; Reilley, Ann A.; Henk, Adam D.; Lockwood, Dale R.; et al. (2009). "Genetic diversity and population structure in Malus sieversii, a wild progenitor species of domesticated apple". Tree Genetics & Genomes. 5 (2): 339–347. doi:10.1007/s11295-008-0190-9. S2CID 19847067. +
    50. +
    51. ^ Lauri, Pierre-éric; Maguylo, Karen; Trottier, Catherine (March 2006). "Architecture and size relations: an essay on the apple (Malus × domestica, Rosaceae) tree". American Journal of Botany. 93 (3): 357–368. doi:10.3732/ajb.93.3.357. PMID 21646196. Archived from the original on 20 April 2019. Retrieved 27 July 2024. +
    52. +
    53. ^ Cornille, Amandine; Gladieux, Pierre; Smulders, Marinus J. M.; Roldán-Ruiz, Isabel; Laurens, François; et al. (2012). Mauricio, Rodney (ed.). "New Insight into the History of Domesticated Apple: Secondary Contribution of the European Wild Apple to the Genome of Cultivated Varieties". PLOS Genetics. 8 (5): e1002703. doi:10.1371/journal.pgen.1002703. PMC 3349737. PMID 22589740. +
    54. +
    55. ^ Kean, Sam (17 May 2012). "ScienceShot: The Secret History of the Domesticated Apple". Archived from the original on 11 June 2016. +
    56. +
    57. ^ Coart, E.; Van Glabeke, S.; De Loose, M.; Larsen, A.S.; Roldán-Ruiz, I. (2006). "Chloroplast diversity in the genus Malus: new insights into the relationship between the European wild apple (Malus sylvestris (L.) Mill.) and the domesticated apple (Malus domestica Borkh.)". Mol. Ecol. 15 (8): 2171–2182. Bibcode:2006MolEc..15.2171C. doi:10.1111/j.1365-294x.2006.02924.x. PMID 16780433. S2CID 31481730. +
    58. +
    59. ^ Rottoli, Mauro; Pessina, Andrea (2007). "Chapter 9: Neolithic agriculture in Italy: an update of archaeobotanical data with particular emphasis on northern settlements". In Colledge, Sue; Conolly, James (eds.). The Origins and Spread of Domestic Plants in Southwest Asia and Europe (First ed.). Walnut Creek, California: Left Coast Press; University College London Institute of Archaeology Publications. pp. 142–143. ISBN 978-1-59874-988-5. OCLC 84838157. +
    60. +
    61. ^ Jump up to: a b c d Schlumbaum, Angela; van Glabeke, Sabine; Roldan-Ruiz, Isabel (January 2012). "Towards the onset of fruit tree growing north of the Alps: Ancient DNA from waterlogged apple (Malus sp.) seed fragments". Annals of Anatomy - Anatomischer Anzeiger. 194 (1): 157–162. doi:10.1016/j.aanat.2011.03.004. PMID 21501956. +
    62. +
    63. ^ Sauer, Jonathan D. (1993). Historical Geography of Crop Plants: A Select Roster (First ed.). Boca Raton, Florida: CRC Press. pp. 109–113. ISBN 978-0-8493-8901-6. LCCN 92045590. OCLC 27224696. +
    64. +
    65. ^ Plinius, Gaius Secundus (1855). The Natural History of Pliny. Vol. III. Translated by Bostock, John; Riley, Henry T. London: Henry G. Bohn. p. 303. Retrieved 3 August 2024. +
    66. +
    67. ^ Martin, Alice A. (1976). All About Apples (First ed.). Boston, Massachusetts: Houghton Mifflin Company. pp. 64–65. ISBN 978-0-395-20724-6. OCLC 1733691. Retrieved 3 August 2024. +
    68. +
    69. ^ Adamson, Melitta Weiss (2004). Food in Medieval Times (First ed.). Westport, Connecticut: Greenwood Press. pp. 19–20. ISBN 978-0-313-32147-4. LCCN 2004014054. OCLC 55738647. +
    70. +
    71. ^ Torrejón, Fernando; Cisternas, Marco; Araneda, Alberto (2004). "Efectos ambientales de la colonización española desde el río Maullín al archipiélago de Chiloé, sur de Chile" [Environmental effects of the spanish colonization from de Maullín river to the Chiloé archipelago, southern Chile]. Revista Chilena de Historia Natural (in Spanish). 77 (4): 661–677. doi:10.4067/s0716-078x2004000400009. +
    72. +
    73. ^ Smith, Archibald William (1963). A Gardener's Book of Plant Names : A Handbook of the Meaning and Origins of Plant Names (First ed.). New York: Harper & Row. p. 40. LCCN 62009906. OCLC 710612. Retrieved 10 August 2024. +
    74. +
    75. ^ Jump up to: a b c Poole, Mike (1980). "Heirloom Apples". In Lawrence, James (ed.). The Harrowsmith Reader Volume II. Camden East, Ontario: Camden House Publishing. p. 122. ISBN 978-0-920656-11-2. OCLC 1336124440. Retrieved 10 August 2024. +
    76. +
    77. ^ Van Valen, James M. (1900). History of Bergen County, New Jersey. New York: New Jersey Publishing and Engraving Company. pp. 33–34. OCLC 25697876. Retrieved 9 August 2024. +
    78. +
    79. ^ Brox, Jane (1999). Five Thousand Days Like This One (First ed.). Boston, Massachusetts: Beacon Press. pp. 150–151. ISBN 978-0-8070-2106-4. LCCN 98035051. OCLC 39605684. Retrieved 9 August 2024. +
    80. +
    81. ^ Cohen, Rachel D. (26 November 2018). "Thanks To Science, You Can Eat An Apple Every Day". The Salt. NPR. Archived from the original on 18 June 2024. Retrieved 1 August 2024. +
    82. +
    83. ^ "The Heirloom Apple Orchard". The Jentsch Lab. Cornell University. Archived from the original on 30 July 2024. Retrieved 9 August 2024. +
    84. +
    85. ^ Ranney, Thomas G. "Polyploidy: From Evolution to Landscape Plant Improvement". Proceedings of the 11th Metropolitan Tree Improvement Alliance (METRIA) Conference. 11th Metropolitan Tree Improvement Alliance Conference held in Gresham, Oregon, August 23–24, 2000. METRIA (NCSU.edu). METRIA. Archived from the original on 23 July 2010. Retrieved 7 November 2010. +
    86. +
    87. ^ Lord, William G.; Ouellette, Amy (February 2010). "Dwarf Rootstocks for Apple Trees in the Home Garden" (PDF). University of New Hampshire. Archived from the original (PDF) on 30 September 2013. Retrieved 1 September 2013. +
    88. +
    89. ^ Fallahi, Esmaeil; Colt, W. Michael; Fallahi, Bahar; Chun, Ik-Jo (January 2002). "The Importance of Apple Rootstocks on Tree Growth, Yield, Fruit Quality, Leaf Nutrition, and Photosynthesis with an Emphasis on 'Fuji'". HortTechnology. 12 (1): 38–44. doi:10.21273/HORTTECH.12.1.38. Archived (PDF) from the original on 11 February 2014. Retrieved 9 August 2024. +
    90. +
    91. ^ Parker, M.L. (September 1993). "Apple Rootstocks and Tree Spacing". North Carolina Cooperative Extension Service. Archived from the original on 11 September 2013. Retrieved 1 September 2013. +
    92. +
    93. ^ Ferree, David Curtis; Warrington, Ian J. (2003). Apples: Botany, Production, and Uses. New York: Centre for Agriculture and Bioscience International. pp. 33–35. ISBN 978-0851995922. OCLC 133167834. +
    94. +
    95. ^ Jump up to: a b c d Polomski, Bob; Reighard, Greg. "Apple HGIC 1350". Home & Garden Information Center. Clemson University. Archived from the original on 28 February 2008. Retrieved 22 January 2008. +
    96. +
    97. ^ Barahona, M. (1992). "Adaptation of Apple Varieties in Ecuador". Acta Horticulturae (310): 135–142. doi:10.17660/ActaHortic.1992.310.17. +
    98. +
    99. ^ Adamson, Nancy Lee (2011). An Assessment of Non-Apis Bees as Fruit and Vegetable Crop Pollinators in Southwest Virginia (PDF) (Doctor of Philosophy in Entomology thesis). Virginia Polytechnic Institute and State University. Archived (PDF) from the original on 20 November 2015. Retrieved 15 October 2015. +
    100. +
    101. ^ Powell, L.E. (1986). "The Chilling Requirement in Apple and Its Role in Regulating Time of Flowering in Spring in Cold-Winter Climate". Acta Horticulturae (179). Wageningen, Netherlands: International Society for Horticultural Science: 129–140. doi:10.17660/ActaHortic.1986.179.10. ISBN 978-90-6605-182-9. +
    102. +
    103. ^ Romano, Andrea (10 September 2023). "20 Best Places to Go Apple Picking in the United States". Travel + Leisure. Archived from the original on 21 April 2024. Retrieved 2 August 2024. +
    104. +
    105. ^ Graziano, Jack; Farcuh, Macarena (10 September 2021). "Controlled Atmosphere Storage of Apples". University of Maryland Extension. Archived from the original on 24 March 2023. Retrieved 2 August 2024. +
    106. +
    107. ^ "FoodKeeper App". FoodSafety.gov. United States Department of Health and Human Services. 26 April 2019. Retrieved 17 September 2024. +
    108. +
    109. ^ "4 Steps to Food Safety". FoodSafety.gov. United States Department of Health and Human Services. 12 April 2019. Retrieved 17 September 2024. +
    110. +
    111. ^ "Refrigerated storage of perishable foods". CSIRO. 26 February 2015. Archived from the original on 15 March 2015. Retrieved 25 May 2007. +
    112. +
    113. ^ Karp, David (25 October 2006). "Puff the Magic Preservative: Lasting Crunch, but Less Scent". The New York Times. Archived from the original on 3 August 2011. Retrieved 26 July 2017. +
    114. +
    115. ^ Jackson, H.S. (1914). "Powdery Mildew". In Lowther, Granville; Worthington, William (eds.). The Encyclopedia of Practical Horticulture: A Reference System of Commercial Horticulture, Covering the Practical and Scientific Phases of Horticulture, with Special Reference to Fruits and Vegetables. Vol. I. North Yakima, Washington: The Encyclopedia of Horticulture Corporation. pp. 475–476. Retrieved 1 August 2024. +
    116. +
    117. ^ Lowther, Granville; Worthington, William, eds. (1914). The Encyclopedia of Practical Horticulture: A Reference System of Commercial Horticulture, Covering the Practical and Scientific Phases of Horticulture, with Special Reference to Fruits and Vegetables. Vol. I. North Yakima, Washington: The Encyclopedia of Horticulture Corporation. pp. 45–51. Retrieved 1 August 2024. +
    118. +
    119. ^ Coli, William M.; Los, Lorraine M., eds. (2003). "Insect Pests". 2003-2004 New England Apple Pest Management Guide. University of Massachusetts Amherst. pp. 28–29. Archived from the original on 12 February 2008. Retrieved 3 March 2008.{{cite book}}: CS1 maint: bot: original URL status unknown (link) +
    120. +
    121. ^ Jump up to: a b Atthowe, Helen; Gilkeson, Linda A.; Kite, L. Patricia; Michalak, Patricia S.; Pleasant, Barbara; Reich, Lee; Scheider, Alfred F. (2009). Bradley, Fern Marshall; Ellis, Bardara W.; Martin, Deborah L. (eds.). The Organic Gardener's Handbook of Natural Pest and Disease Control. New York: Rodale, Inc. pp. 32–34. ISBN 978-1-60529-677-7. LCCN 2009039996. OCLC 419860680. +
    122. +
    123. ^ Coli, William M.; Berkett, Lorraine P.; Spitko, Robin, eds. (2003). "Other Apple Diseases". 2003-2004 New England Apple Pest Management Guide. University of Massachusetts Amherst. pp. 19–27. Archived from the original on 12 February 2008. Retrieved 3 March 2008.{{cite book}}: CS1 maint: bot: original URL status unknown (link) +
    124. +
    125. ^ Martin, Phillip L.; Krawczyk, Teresa; Khodadadi, Fatemeh; Aćimović, Srđan G.; Peter, Kari A. (2021). "Bitter Rot of Apple in the Mid-Atlantic United States: Causal Species and Evaluation of the Impacts of Regional Weather Patterns and Cultivar Susceptibility". Phytopathology. 111 (6): 966–981. doi:10.1094/PHYTO-09-20-0432-R. ISSN 0031-949X. PMID 33487025. S2CID 231701083. +
    126. +
    127. ^ Erler, Fedai (1 January 2010). "Efficacy of tree trunk coating materials in the control of the apple clearwing, Synanthedon myopaeformis". Journal of Insect Science. 10 (1): 63. doi:10.1673/031.010.6301. PMC 3014806. PMID 20672979. +
    128. +
    129. ^ Elzebroek, A. T. G.; Wind, Koop (2008). Guide to Cultivated Plants. Wallingford, United Kingdom: CABI. p. 27. ISBN 978-1-84593-356-2. LCCN 2007028459. OCLC 156975183. Archived from the original on 20 October 2020. Retrieved 6 October 2020. +
    130. +
    131. ^ Jump up to: a b "Apple – Malus domestica". Natural England. Archived from the original on 12 May 2008. Retrieved 22 January 2008. +
    132. +
    133. ^ "Home". National Fruit Collection. Archived from the original on 15 June 2012. Retrieved 2 December 2012. +
    134. +
    135. ^ "ECPGR Malus/Pyrus Working Group Members". Ecpgr.cgiar.org. 22 July 2002. Archived from the original on 26 August 2014. Retrieved 25 August 2014. +
    136. +
    137. ^ Jump up to: a b Tarjan, Sue (Fall 2006). "Autumn Apple Musings" (PDF). News & Notes of the UCSC Farm & Garden, Center for Agroecology & Sustainable Food Systems. pp. 1–2. Archived from the original (PDF) on 11 August 2007. Retrieved 24 January 2008. +
    138. +
    139. ^ Beck, Kellen (17 October 2020). "How breeders bring out the best in new apples". Mashable. Archived from the original on 31 July 2024. Retrieved 31 July 2024. +
    140. +
    141. ^ Migicovsky, Zoë (22 August 2021). "How a few good apples spawned today's top varieties — and why breeders must branch out". The Conversation. Archived from the original on 31 July 2024. Retrieved 31 July 2024. +
    142. +
    143. ^ Peil, A.; Dunemann, F.; Richter, K.; Hoefer, M.; Király, I.; Flachowsky, H.; Hanke, M.-V. (2008). "Resistance Breeding in Apple at Dresden-Pillnitz". Ecofruit - 13th International Conference on Cultivation Technique and Phytopathological Problems in Organic Fruit-Growing: Proceedings to the Conference from 18thFebruary to 20th February 2008 at Weinsberg/Germany (in German): 220–225. Archived from the original on 28 January 2021. Retrieved 31 July 2024. +
    144. +
    145. ^ Jump up to: a b "World apple situation". Archived from the original on 11 February 2008. Retrieved 24 January 2008. +
    146. +
    147. ^ Weaver, Sue (June–July 2003). "Crops & Gardening – Apples of Antiquity". Hobby Farms Magazine. Archived from the original on 19 February 2017. +
    148. +
    149. ^ Jump up to: a b c "Apple production in 2022; from pick lists: Crops/World Regions/Production Quantity". FAOSTAT, UN Food and Agriculture Organization, Statistics Division. 2024. Archived from the original on 12 November 2016. Retrieved 18 June 2024. +
    150. +
    151. ^ Nelson, Lewis S.; Shih, Richard D.; Balick, Michael J. (2007). Handbook of Poisonous and Injurious Plants (Second ed.). New York: New York Botanical Garden : Springer. pp. 27, 211–212. ISBN 978-0387-31268-2. LCCN 2005938815. OCLC 77537459. Retrieved 11 September 2024. +
    152. +
    153. ^ "Amygdalin". Toxnet, US Library of Medicine. Archived from the original on 21 April 2017. Retrieved 20 April 2017. +
    154. +
    155. ^ Jump up to: a b c d e f "General Information – Apple". Informall. Archived from the original on 23 July 2012. Retrieved 17 October 2011. +
    156. +
    157. ^ Landau, Elizabeth, Oral allergy syndrome may explain mysterious reactions, 8 April 2009, CNN Health, accessed 17 October 2011 +
    158. +
    159. ^ United States Food and Drug Administration (2024). "Daily Value on the Nutrition and Supplement Facts Labels". FDA. Archived from the original on 27 March 2024. Retrieved 28 March 2024. +
    160. +
    161. ^ National Academies of Sciences, Engineering, and Medicine; Health and Medicine Division; Food and Nutrition Board; Committee to Review the Dietary Reference Intakes for Sodium and Potassium (2019). Oria, Maria; Harrison, Meghan; Stallings, Virginia A. (eds.). Dietary Reference Intakes for Sodium and Potassium. The National Academies Collection: Reports funded by National Institutes of Health. Washington, DC: National Academies Press (US). ISBN 978-0-309-48834-1. PMID 30844154. Archived from the original on 9 May 2024. Retrieved 21 June 2024. +
    162. +
    163. ^ Jump up to: a b c d Davidson, Alan (2014). "Apple". In Jaine, Tom (ed.). The Oxford Companion to Food. Illustrated by Soun Vannithone (Third ed.). Oxford: Oxford University Press. pp. 27–31. ISBN 978-0-19-967733-7. LCCN 2013957569. OCLC 890807357. OL 27172691M. Retrieved 18 September 2024. +
    164. +
    165. ^ Traverso, Amy (2011). The Apple Lover's Cookbook. Photographs by Squire Fox (First ed.). New York: W.W. Norton & Company. pp. 16, 32, 35, 45, 92, 137, 262–263, 275. ISBN 978-0-393-06599-2. LCCN 2011016560. OCLC 711051767. OL 16450839W. +
    166. +
    167. ^ Kellogg, Kristi (15 January 2015). "81 Best Apple Recipes: Dinners, Desserts, Salads, and More". Epicurious. Archived from the original on 18 October 2020. Retrieved 17 October 2020. +
    168. +
    169. ^ Davidson, Alan (2014). "Toffee Apple". In Jaine, Tom (ed.). The Oxford Companion to Food. Illustrated by Soun Vannithone (Third ed.). Oxford: Oxford University Press. p. 824. ISBN 978-0-19-967733-7. LCCN 2013957569. OCLC 890807357. OL 27172691M. Retrieved 18 September 2024. +
    170. +
    171. ^ Shurpin, Yehuda. "Why All the Symbolic Rosh Hashanah Foods? "בולבול"". Chabad.org. Archived from the original on 21 March 2023. Retrieved 21 March 2023. +
    172. +
    173. ^ Yepsen, Roger B. (2017) [1994]. Apples (Revised and Updated ed.). New York: W.W. Norton & Company. p. 52. ISBN 978-1-68268-019-3. LCCN 2017010136. OCLC 973918728. +
    174. +
    175. ^ "Organic apples". USDA Agricultural Marketing Service. February 2016. Archived from the original on 24 February 2017. Retrieved 23 February 2017. +
    176. +
    177. ^ Jump up to: a b "European Organic Apple Production Demonstrates the Value of Pesticides" (PDF). CropLife Foundation, Washington, DC. December 2011. Archived (PDF) from the original on 24 February 2017. Retrieved 23 February 2017. +
    178. +
    179. ^ Ribeiro, Flávia A.P.; Gomes de Moura, Carolina F.; Aguiar, Odair; de Oliveira, Flavia; Spadari, Regina C.; Oliveira, Nara R.C.; Oshima, Celina T.F.; Ribeiro, Daniel A. (September 2014). "The chemopreventive activity of apple against carcinogenesis: antioxidant activity and cell cycle control". European Journal of Cancer Prevention (Review). 23 (5): 477–480. doi:10.1097/CEJ.0000000000000005. PMID 24366437. S2CID 23026644. +
    180. +
    181. ^ Nicolas, J. J.; Richard-Forget, F. C.; Goupy, P. M.; Amiot, M. J.; Aubert, S. Y. (1 January 1994). "Enzymatic browning reactions in apple and apple products". Critical Reviews in Food Science and Nutrition. 34 (2): 109–157. doi:10.1080/10408399409527653. PMID 8011143. +
    182. +
    183. ^ "PPO silencing". Okanagan Specialty Fruits. 2019. Archived from the original on 27 April 2021. Retrieved 14 November 2019. +
    184. +
    185. ^ "United States: GM non-browning Arctic apple expands into foodservice". Fresh Fruit Portal. 13 August 2019. Archived from the original on 27 June 2021. Retrieved 14 November 2019. +
    186. +
    187. ^ "Okanagan Specialty Fruits: Biotechnology Consultation Agency Response Letter BNF 000132". U.S. Food and Drug Administration. 20 March 2015. Archived from the original on 31 October 2017. Retrieved 14 November 2019. +
    188. +
    189. ^ "Questions and answers: Arctic Apple". Canadian Food Inspection Agency, Government of Canada. 8 September 2017. Archived from the original on 19 September 2018. Retrieved 14 November 2019. +
    190. +
    191. ^ Yu, Xiuzhu; Van De Voort, Frederick R.; Li, Zhixi; Yue, Tianli (2007). "Proximate Composition of the Apple Seed and Characterization of Its Oil". International Journal of Food Engineering. 3 (5). doi:10.2202/1556-3758.1283. S2CID 98590230. +
    192. +
    193. ^ Jump up to: a b c Davidson, Hilda Roderick Ellis (1990) [1st pub. 1964]. Gods and Myths of Northern Europe. London: Penguin Books. pp. 165–166. ISBN 0-14-013627-4. OCLC 29336401. +
    194. +
    195. ^ Davidson, Hilda Ellis (1998). Roles of the Northern Goddess. London; New York: Routledge. pp. 146–147. doi:10.4324/9780203025550. ISBN 0-415-13610-5. LCCN 97018309. OCLC 48138055. +
    196. +
    197. ^ Biedermann, Hans (1992). Dictionary of Symbolism. Translated by Hulbert, James. New York: Facts on File. pp. 16–17. ISBN 978-0-8160-2593-0. LCCN 91044933. OCLC 25092926. Retrieved 3 October 2024. +
    198. +
    199. ^ Jump up to: a b Ruck, Carl A. P.; Staples, Blaise D.; Heinrich, Clark (2001). The apples of Apollo : pagan and Christian mysteries of the Eucharist. Durham, North Carolina: Carolina Academic Press. pp. 64–70. ISBN 978-0-89089-924-3. LCCN 00040351. OCLC 46337324. +
    200. +
    201. ^ "Eris - Greek Goddess of Strife & Discord (Roman Discordia)". Theoi Project. Aaron J. Atsma. Archived from the original on 25 September 2024. Retrieved 26 September 2024. +
    202. +
    203. ^ Lucian (1905). The Works of Lucian of Samosata. Vol. I. Translated by Fowler, H.W.; Fowler, F.G. (First ed.). Oxford: Clarendon Press. pp. 78–85. LCCN 06001045. OCLC 506365. Retrieved 26 September 2024. +
    204. +
    205. ^ "Judgement of Paris - Greek Mythology". Theoi Project. Aaron J. Atsma. Archived from the original on 24 August 2024. Retrieved 26 September 2024. +
    206. +
    207. ^ Plato (1997). "Epigrams". In Cooper, John M.; Hutchinson, D.S. (eds.). Complete Works. Translated by Edmonds, J.M.; Cooper, John M. Indianapolis, Indiana: Hackett Publishing. p. 1744. ISBN 0-87220-349-2. LCCN 96053280. OCLC 36178550. Retrieved 27 September 2024. +
    208. +
    209. ^ Pinsent, John (1969). Greek Mythology (First ed.). London: Paul Hamlyn. p. 79. ISBN 978-0-600-02422-4. LCCN 78449216. OCLC 61702. Retrieved 3 October 2024. +
    210. +
    211. ^ "Atalanta (Atalante) - Arcadian Heroine of Greek Mythology". Theoi Project. Aaron J. Atsma. Archived from the original on 27 September 2024. Retrieved 3 October 2024. +
    212. +
    213. ^ Flieger, Verlyn (2005). Interrupted Music : The Making of Tolkien's Mythology. Kent, Ohio: Kent State University Press. pp. 122–123. ISBN 978-0-87338-824-5. LCCN 2004024490. OCLC 56805947. +
    214. +
    215. ^ Jump up to: a b "Why Do the Chinese Give Apples Around Christmas?". Teach English In China. 22 December 2019. Archived from the original on 1 October 2020. Retrieved 3 September 2024. +
    216. +
    217. ^ Jump up to: a b Macrone, Michael (1998). Brush up your Bible!. New York: Gramercy Books. pp. 15–16, 340–341. ISBN 978-0-517-20189-3. OCLC 38270894. Retrieved 31 July 2024. +
    218. +
    219. ^ Kissling, Paul J. (2004). Genesis. Vol. 1. Joplin, Missouri: College Press. p. 193. ISBN 978-0-89900-875-2. LCCN 2004022577. OCLC 56672257. Archived from the original on 26 January 2021. Retrieved 6 October 2020. +
    220. +
    221. ^ Genesis 2:17 +
    222. +
    223. ^ Hendel, Ronald S. (2013). The Book of Genesis: A Biography. Princeton, New Jersey: Princeton University Press. p. 114. ISBN 978-0-69114012-4. LCCN 2012015634. OCLC 788265521. Archived from the original on 5 March 2023. Retrieved 4 October 2024. +
    224. +
    225. ^ Mieder, Wolfgang; Kingsbury, Stewart A.; Harder, Kelsie B., eds. (1996) [1992]. A Dictionary of American Proverbs (Paperback ed.). New York: Oxford University Press. p. 23. ISBN 978-0-19-511133-0. LCCN 91015508. OCLC 23693799. Retrieved 23 August 2024. +
    226. +
    227. ^ Pollan, Michael (2001). The Botany of Desire: A Plant's-Eye View of the World (First ed.). New York: Random House. pp. 9, 22, 50. ISBN 978-0-375-50129-6. LCCN 00066479. OCLC 49803415. +
    228. +
    +

    Further reading

    + +
    +
    • Media related to Apples at Wikimedia Commons
    + + + + + + + + + + + +
    +
    + +
    +
    + +
    + +
    +
    +
    +
      +
      + + +
      \ No newline at end of file diff --git a/tests/async/test_async_doanloader.py b/tests/async/test_async_doanloader.py new file mode 100644 index 0000000..4798b4c --- /dev/null +++ b/tests/async/test_async_doanloader.py @@ -0,0 +1,229 @@ +import os +import sys +import asyncio +import shutil +from typing import List +import tempfile +import time + +# Add the parent directory to the Python path +parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(parent_dir) + +from crawl4ai.async_webcrawler import AsyncWebCrawler + +class TestDownloads: + def __init__(self): + self.temp_dir = tempfile.mkdtemp(prefix="crawl4ai_test_") + self.download_dir = os.path.join(self.temp_dir, "downloads") + os.makedirs(self.download_dir, exist_ok=True) + self.results: List[str] = [] + + def cleanup(self): + shutil.rmtree(self.temp_dir) + + def log_result(self, test_name: str, success: bool, message: str = ""): + result = f"{'✅' if success else '❌'} {test_name}: {message}" + self.results.append(result) + print(result) + + async def test_basic_download(self): + """Test basic file download functionality""" + try: + async with AsyncWebCrawler( + accept_downloads=True, + downloads_path=self.download_dir, + verbose=True + ) as crawler: + # Python.org downloads page typically has stable download links + result = await crawler.arun( + url="https://www.python.org/downloads/", + js_code=""" + // Click first download link + const downloadLink = document.querySelector('a[href$=".exe"]'); + if (downloadLink) downloadLink.click(); + """ + ) + + success = result.downloaded_files is not None and len(result.downloaded_files) > 0 + self.log_result( + "Basic Download", + success, + f"Downloaded {len(result.downloaded_files or [])} files" if success else "No files downloaded" + ) + except Exception as e: + self.log_result("Basic Download", False, str(e)) + + async def test_persistent_context_download(self): + """Test downloads with persistent context""" + try: + user_data_dir = os.path.join(self.temp_dir, "user_data") + os.makedirs(user_data_dir, exist_ok=True) + + async with AsyncWebCrawler( + accept_downloads=True, + downloads_path=self.download_dir, + use_persistent_context=True, + user_data_dir=user_data_dir, + verbose=True + ) as crawler: + result = await crawler.arun( + url="https://www.python.org/downloads/", + js_code=""" + const downloadLink = document.querySelector('a[href$=".exe"]'); + if (downloadLink) downloadLink.click(); + """ + ) + + success = result.downloaded_files is not None and len(result.downloaded_files) > 0 + self.log_result( + "Persistent Context Download", + success, + f"Downloaded {len(result.downloaded_files or [])} files" if success else "No files downloaded" + ) + except Exception as e: + self.log_result("Persistent Context Download", False, str(e)) + + async def test_multiple_downloads(self): + """Test multiple simultaneous downloads""" + try: + async with AsyncWebCrawler( + accept_downloads=True, + downloads_path=self.download_dir, + verbose=True + ) as crawler: + result = await crawler.arun( + url="https://www.python.org/downloads/", + js_code=""" + // Click multiple download links + const downloadLinks = document.querySelectorAll('a[href$=".exe"]'); + downloadLinks.forEach(link => link.click()); + """ + ) + + success = result.downloaded_files is not None and len(result.downloaded_files) > 1 + self.log_result( + "Multiple Downloads", + success, + f"Downloaded {len(result.downloaded_files or [])} files" if success else "Not enough files downloaded" + ) + except Exception as e: + self.log_result("Multiple Downloads", False, str(e)) + + async def test_different_browsers(self): + """Test downloads across different browser types""" + browsers = ["chromium", "firefox", "webkit"] + + for browser_type in browsers: + try: + async with AsyncWebCrawler( + accept_downloads=True, + downloads_path=self.download_dir, + browser_type=browser_type, + verbose=True + ) as crawler: + result = await crawler.arun( + url="https://www.python.org/downloads/", + js_code=""" + const downloadLink = document.querySelector('a[href$=".exe"]'); + if (downloadLink) downloadLink.click(); + """ + ) + + success = result.downloaded_files is not None and len(result.downloaded_files) > 0 + self.log_result( + f"{browser_type.title()} Download", + success, + f"Downloaded {len(result.downloaded_files or [])} files" if success else "No files downloaded" + ) + except Exception as e: + self.log_result(f"{browser_type.title()} Download", False, str(e)) + + async def test_edge_cases(self): + """Test various edge cases""" + + # Test 1: Downloads without specifying download path + try: + async with AsyncWebCrawler( + accept_downloads=True, + verbose=True + ) as crawler: + result = await crawler.arun( + url="https://www.python.org/downloads/", + js_code="document.querySelector('a[href$=\".exe\"]').click()" + ) + self.log_result( + "Default Download Path", + True, + f"Downloaded to default path: {result.downloaded_files[0] if result.downloaded_files else 'None'}" + ) + except Exception as e: + self.log_result("Default Download Path", False, str(e)) + + # Test 2: Downloads with invalid path + try: + async with AsyncWebCrawler( + accept_downloads=True, + downloads_path="/invalid/path/that/doesnt/exist", + verbose=True + ) as crawler: + result = await crawler.arun( + url="https://www.python.org/downloads/", + js_code="document.querySelector('a[href$=\".exe\"]').click()" + ) + self.log_result("Invalid Download Path", False, "Should have raised an error") + except Exception as e: + self.log_result("Invalid Download Path", True, "Correctly handled invalid path") + + # Test 3: Download with accept_downloads=False + try: + async with AsyncWebCrawler( + accept_downloads=False, + verbose=True + ) as crawler: + result = await crawler.arun( + url="https://www.python.org/downloads/", + js_code="document.querySelector('a[href$=\".exe\"]').click()" + ) + success = result.downloaded_files is None + self.log_result( + "Disabled Downloads", + success, + "Correctly ignored downloads" if success else "Unexpectedly downloaded files" + ) + except Exception as e: + self.log_result("Disabled Downloads", False, str(e)) + + async def run_all_tests(self): + """Run all test cases""" + print("\n🧪 Running Download Tests...\n") + + test_methods = [ + self.test_basic_download, + self.test_persistent_context_download, + self.test_multiple_downloads, + self.test_different_browsers, + self.test_edge_cases + ] + + for test in test_methods: + print(f"\n📝 Running {test.__doc__}...") + await test() + await asyncio.sleep(2) # Brief pause between tests + + print("\n📊 Test Results Summary:") + for result in self.results: + print(result) + + successes = len([r for r in self.results if '✅' in r]) + total = len(self.results) + print(f"\nTotal: {successes}/{total} tests passed") + + self.cleanup() + +async def main(): + tester = TestDownloads() + await tester.run_all_tests() + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/tests/async/test_content_filter.py b/tests/async/test_content_filter.py new file mode 100644 index 0000000..a873c41 --- /dev/null +++ b/tests/async/test_content_filter.py @@ -0,0 +1,175 @@ +import os, sys +import pytest +from bs4 import BeautifulSoup +from typing import List + +# Add the parent directory to the Python path +parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(parent_dir) + +from crawl4ai.content_filter_strategy import BM25ContentFilter + +@pytest.fixture +def basic_html(): + return """ + + + Test Article + + + + +

      Main Heading

      +
      +

      This is a long paragraph with more than fifty words. It continues with more text to ensure we meet the minimum word count threshold. We need to make sure this paragraph is substantial enough to be considered for extraction according to our filtering rules. This should be enough words now.

      + +
      + + + """ + +@pytest.fixture +def wiki_html(): + return """ + + + Wikipedia Article + + +

      Article Title

      +

      Section 1

      +

      Short but important section header description.

      +
      +

      Long paragraph with sufficient words to meet the minimum threshold. This paragraph continues with more text to ensure we have enough content for proper testing. We need to make sure this has enough words to pass our filters and be considered valid content for extraction purposes.

      +
      + + + """ + +@pytest.fixture +def no_meta_html(): + return """ + + +

      Simple Page

      +

      First paragraph that should be used as fallback for query when no meta tags exist. This text needs to be long enough to serve as a meaningful fallback for our content extraction process.

      + + + """ + +class TestBM25ContentFilter: + def test_basic_extraction(self, basic_html): + """Test basic content extraction functionality""" + filter = BM25ContentFilter() + contents = filter.filter_content(basic_html) + + assert contents, "Should extract content" + assert len(contents) >= 1, "Should extract at least one content block" + assert "long paragraph" in ' '.join(contents).lower() + assert "navigation" not in ' '.join(contents).lower() + + def test_user_query_override(self, basic_html): + """Test that user query overrides metadata extraction""" + user_query = "specific test query" + filter = BM25ContentFilter(user_query=user_query) + + # Access internal state to verify query usage + soup = BeautifulSoup(basic_html, 'lxml') + extracted_query = filter.extract_page_query(soup.find('head')) + + assert extracted_query == user_query + assert "Test description" not in extracted_query + + def test_header_extraction(self, wiki_html): + """Test that headers are properly extracted despite length""" + filter = BM25ContentFilter() + contents = filter.filter_content(wiki_html) + + combined_content = ' '.join(contents).lower() + assert "section 1" in combined_content, "Should include section header" + assert "article title" in combined_content, "Should include main title" + + def test_no_metadata_fallback(self, no_meta_html): + """Test fallback behavior when no metadata is present""" + filter = BM25ContentFilter() + contents = filter.filter_content(no_meta_html) + + assert contents, "Should extract content even without metadata" + assert "First paragraph" in ' '.join(contents), "Should use first paragraph content" + + def test_empty_input(self): + """Test handling of empty input""" + filter = BM25ContentFilter() + assert filter.filter_content("") == [] + assert filter.filter_content(None) == [] + + def test_malformed_html(self): + """Test handling of malformed HTML""" + malformed_html = "

      Unclosed paragraph

      Nested content

      " + filter = BM25ContentFilter() + contents = filter.filter_content(malformed_html) + + assert isinstance(contents, list), "Should return list even with malformed HTML" + + def test_threshold_behavior(self, basic_html): + """Test different BM25 threshold values""" + strict_filter = BM25ContentFilter(bm25_threshold=2.0) + lenient_filter = BM25ContentFilter(bm25_threshold=0.5) + + strict_contents = strict_filter.filter_content(basic_html) + lenient_contents = lenient_filter.filter_content(basic_html) + + assert len(strict_contents) <= len(lenient_contents), \ + "Strict threshold should extract fewer elements" + + def test_html_cleaning(self, basic_html): + """Test HTML cleaning functionality""" + filter = BM25ContentFilter() + contents = filter.filter_content(basic_html) + + cleaned_content = ' '.join(contents) + assert 'class=' not in cleaned_content, "Should remove class attributes" + assert 'style=' not in cleaned_content, "Should remove style attributes" + assert ' +
      {'

      Test content. ' * 1000}

      + + """ + filter = BM25ContentFilter() + contents = filter.filter_content(large_html) + assert contents, "Should handle large content blocks" + + @pytest.mark.parametrize("unwanted_tag", [ + 'script', 'style', 'nav', 'footer', 'header' + ]) + def test_excluded_tags(self, unwanted_tag): + """Test that specific tags are properly excluded""" + html = f""" + + <{unwanted_tag}>Should not appear +

      Should appear

      + + """ + filter = BM25ContentFilter() + contents = filter.filter_content(html) + + combined_content = ' '.join(contents).lower() + assert "should not appear" not in combined_content + + def test_performance(self, basic_html): + """Test performance with timer""" + filter = BM25ContentFilter() + + import time + start = time.perf_counter() + filter.filter_content(basic_html) + duration = time.perf_counter() - start + + assert duration < 1.0, f"Processing took too long: {duration:.2f} seconds" + +if __name__ == "__main__": + pytest.main([__file__]) \ No newline at end of file diff --git a/tests/async/test_content_scraper_strategy.py b/tests/async/test_content_scraper_strategy.py new file mode 100644 index 0000000..62c4914 --- /dev/null +++ b/tests/async/test_content_scraper_strategy.py @@ -0,0 +1,162 @@ +import asyncio +from bs4 import BeautifulSoup +from typing import Dict, Any +import os +import sys +import time +import csv +from tabulate import tabulate +from dataclasses import dataclass +from typing import List, Dict + +parent_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +sys.path.append(parent_dir) +__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) + +from crawl4ai.content_scraping_strategy import WebScrapingStrategy +from crawl4ai.content_scraping_strategy import WebScrapingStrategy as WebScrapingStrategyCurrent +# from crawl4ai.content_scrapping_strategy_current import WebScrapingStrategy as WebScrapingStrategyCurrent + +@dataclass +class TestResult: + name: str + success: bool + images: int + internal_links: int + external_links: int + markdown_length: int + execution_time: float + +class StrategyTester: + def __init__(self): + self.new_scraper = WebScrapingStrategy() + self.current_scraper = WebScrapingStrategyCurrent() + with open(__location__ + '/sample_wikipedia.html', 'r', encoding='utf-8') as f: + self.WIKI_HTML = f.read() + self.results = {'new': [], 'current': []} + + def run_test(self, name: str, **kwargs) -> tuple[TestResult, TestResult]: + results = [] + for scraper in [self.new_scraper, self.current_scraper]: + start_time = time.time() + result = scraper._get_content_of_website_optimized( + url="https://en.wikipedia.org/wiki/Test", + html=self.WIKI_HTML, + **kwargs + ) + execution_time = time.time() - start_time + + test_result = TestResult( + name=name, + success=result['success'], + images=len(result['media']['images']), + internal_links=len(result['links']['internal']), + external_links=len(result['links']['external']), + markdown_length=len(result['markdown']), + execution_time=execution_time + ) + results.append(test_result) + + return results[0], results[1] # new, current + + def run_all_tests(self): + test_cases = [ + ("Basic Extraction", {}), + ("Exclude Tags", {'excluded_tags': ['table', 'div.infobox', 'div.navbox']}), + ("Word Threshold", {'word_count_threshold': 50}), + ("CSS Selector", {'css_selector': 'div.mw-parser-output > p'}), + ("Link Exclusions", { + 'exclude_external_links': True, + 'exclude_social_media_links': True, + 'exclude_domains': ['facebook.com', 'twitter.com'] + }), + ("Media Handling", { + 'exclude_external_images': True, + 'image_description_min_word_threshold': 20 + }), + ("Text Only", { + 'only_text': True, + 'remove_forms': True + }), + ("HTML Cleaning", { + 'clean_html': True, + 'keep_data_attributes': True + }), + ("HTML2Text Options", { + 'html2text': { + 'skip_internal_links': True, + 'single_line_break': True, + 'mark_code': True, + 'preserve_tags': ['pre', 'code'] + } + }) + ] + + all_results = [] + for name, kwargs in test_cases: + try: + new_result, current_result = self.run_test(name, **kwargs) + all_results.append((name, new_result, current_result)) + except Exception as e: + print(f"Error in {name}: {str(e)}") + + self.save_results_to_csv(all_results) + self.print_comparison_table(all_results) + + def save_results_to_csv(self, all_results: List[tuple]): + csv_file = os.path.join(__location__, 'strategy_comparison_results.csv') + with open(csv_file, 'w', newline='') as f: + writer = csv.writer(f) + writer.writerow(['Test Name', 'Strategy', 'Success', 'Images', 'Internal Links', + 'External Links', 'Markdown Length', 'Execution Time']) + + for name, new_result, current_result in all_results: + writer.writerow([name, 'New', new_result.success, new_result.images, + new_result.internal_links, new_result.external_links, + new_result.markdown_length, f"{new_result.execution_time:.3f}"]) + writer.writerow([name, 'Current', current_result.success, current_result.images, + current_result.internal_links, current_result.external_links, + current_result.markdown_length, f"{current_result.execution_time:.3f}"]) + + def print_comparison_table(self, all_results: List[tuple]): + table_data = [] + headers = ['Test Name', 'Strategy', 'Success', 'Images', 'Internal Links', + 'External Links', 'Markdown Length', 'Time (s)'] + + for name, new_result, current_result in all_results: + # Check for differences + differences = [] + if new_result.images != current_result.images: differences.append('images') + if new_result.internal_links != current_result.internal_links: differences.append('internal_links') + if new_result.external_links != current_result.external_links: differences.append('external_links') + if new_result.markdown_length != current_result.markdown_length: differences.append('markdown') + + # Add row for new strategy + new_row = [ + name, 'New', new_result.success, new_result.images, + new_result.internal_links, new_result.external_links, + new_result.markdown_length, f"{new_result.execution_time:.3f}" + ] + table_data.append(new_row) + + # Add row for current strategy + current_row = [ + '', 'Current', current_result.success, current_result.images, + current_result.internal_links, current_result.external_links, + current_result.markdown_length, f"{current_result.execution_time:.3f}" + ] + table_data.append(current_row) + + # Add difference summary if any + if differences: + table_data.append(['', '⚠️ Differences', ', '.join(differences), '', '', '', '', '']) + + # Add empty row for better readability + table_data.append([''] * len(headers)) + + print("\nStrategy Comparison Results:") + print(tabulate(table_data, headers=headers, tablefmt='grid')) + +if __name__ == "__main__": + tester = StrategyTester() + tester.run_all_tests() \ No newline at end of file diff --git a/tests/async/test_markdown_genertor.py b/tests/async/test_markdown_genertor.py new file mode 100644 index 0000000..025a031 --- /dev/null +++ b/tests/async/test_markdown_genertor.py @@ -0,0 +1,165 @@ +# ## Issue #236 +# - **Last Updated:** 2024-11-11 01:42:14 +# - **Title:** [user data crawling opens two windows, unable to control correct user browser](https://github.com/unclecode/crawl4ai/issues/236) +# - **State:** open + +import os, sys, time +parent_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(parent_dir) +__location__ = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__))) +import asyncio +import os +import time +from typing import Dict, Any +from crawl4ai.markdown_generation_strategy import DefaultMarkdownGenerationStrategy + +# Get current directory +__location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) + +def print_test_result(name: str, result: Dict[str, Any], execution_time: float): + """Helper function to print test results.""" + print(f"\n{'='*20} {name} {'='*20}") + print(f"Execution time: {execution_time:.4f} seconds") + + + # Save markdown to files + for key, content in result.items(): + if isinstance(content, str): + with open(__location__ + f"/output/{name.lower()}_{key}.md", "w") as f: + f.write(content) + + # # Print first few lines of each markdown version + # for key, content in result.items(): + # if isinstance(content, str): + # preview = '\n'.join(content.split('\n')[:3]) + # print(f"\n{key} (first 3 lines):") + # print(preview) + # print(f"Total length: {len(content)} characters") + +def test_basic_markdown_conversion(): + """Test basic markdown conversion with links.""" + with open(__location__ + "/data/wikipedia.html", "r") as f: + cleaned_html = f.read() + + generator = DefaultMarkdownGenerationStrategy() + + start_time = time.perf_counter() + result = generator.generate_markdown( + cleaned_html=cleaned_html, + base_url="https://en.wikipedia.org" + ) + execution_time = time.perf_counter() - start_time + + print_test_result("Basic Markdown Conversion", { + 'raw': result.raw_markdown, + 'with_citations': result.markdown_with_citations, + 'references': result.references_markdown + }, execution_time) + + # Basic assertions + assert result.raw_markdown, "Raw markdown should not be empty" + assert result.markdown_with_citations, "Markdown with citations should not be empty" + assert result.references_markdown, "References should not be empty" + assert "⟨" in result.markdown_with_citations, "Citations should use ⟨⟩ brackets" + assert "## References" in result.references_markdown, "Should contain references section" + +def test_relative_links(): + """Test handling of relative links with base URL.""" + markdown = """ + Here's a [relative link](/wiki/Apple) and an [absolute link](https://example.com). + Also an [image](/images/test.png) and another [page](/wiki/Banana). + """ + + generator = DefaultMarkdownGenerationStrategy() + result = generator.generate_markdown( + cleaned_html=markdown, + base_url="https://en.wikipedia.org" + ) + + assert "https://en.wikipedia.org/wiki/Apple" in result.references_markdown + assert "https://example.com" in result.references_markdown + assert "https://en.wikipedia.org/images/test.png" in result.references_markdown + +def test_duplicate_links(): + """Test handling of duplicate links.""" + markdown = """ + Here's a [link](/test) and another [link](/test) and a [different link](/other). + """ + + generator = DefaultMarkdownGenerationStrategy() + result = generator.generate_markdown( + cleaned_html=markdown, + base_url="https://example.com" + ) + + # Count citations in markdown + citations = result.markdown_with_citations.count("⟨1⟩") + assert citations == 2, "Same link should use same citation number" + +def test_link_descriptions(): + """Test handling of link titles and descriptions.""" + markdown = """ + Here's a [link with title](/test "Test Title") and a [link with description](/other) to test. + """ + + generator = DefaultMarkdownGenerationStrategy() + result = generator.generate_markdown( + cleaned_html=markdown, + base_url="https://example.com" + ) + + assert "Test Title" in result.references_markdown, "Link title should be in references" + assert "link with description" in result.references_markdown, "Link text should be in references" + +def test_performance_large_document(): + """Test performance with large document.""" + with open(__location__ + "/data/wikipedia.md", "r") as f: + markdown = f.read() + + # Test with multiple iterations + iterations = 5 + times = [] + + generator = DefaultMarkdownGenerationStrategy() + + for i in range(iterations): + start_time = time.perf_counter() + result = generator.generate_markdown( + cleaned_html=markdown, + base_url="https://en.wikipedia.org" + ) + end_time = time.perf_counter() + times.append(end_time - start_time) + + avg_time = sum(times) / len(times) + print(f"\n{'='*20} Performance Test {'='*20}") + print(f"Average execution time over {iterations} iterations: {avg_time:.4f} seconds") + print(f"Min time: {min(times):.4f} seconds") + print(f"Max time: {max(times):.4f} seconds") + +def test_image_links(): + """Test handling of image links.""" + markdown = """ + Here's an ![image](/image.png "Image Title") and another ![image](/other.jpg). + And a regular [link](/page). + """ + + generator = DefaultMarkdownGenerationStrategy() + result = generator.generate_markdown( + cleaned_html=markdown, + base_url="https://example.com" + ) + + assert "![" in result.markdown_with_citations, "Image markdown syntax should be preserved" + assert "Image Title" in result.references_markdown, "Image title should be in references" + +if __name__ == "__main__": + print("Running markdown generation strategy tests...") + + test_basic_markdown_conversion() + test_relative_links() + test_duplicate_links() + test_link_descriptions() + test_performance_large_document() + test_image_links() + \ No newline at end of file diff --git a/tests/docker_example.py b/tests/docker_example.py new file mode 100644 index 0000000..658e80f --- /dev/null +++ b/tests/docker_example.py @@ -0,0 +1,332 @@ +import requests +import json +import time +import sys +import base64 +import os +from typing import Dict, Any + +class Crawl4AiTester: + def __init__(self, base_url: str = "http://localhost:11235", api_token: str = None): + self.base_url = base_url + self.api_token = api_token or os.getenv('CRAWL4AI_API_TOKEN') # Check environment variable as fallback + self.headers = {'Authorization': f'Bearer {self.api_token}'} if self.api_token else {} + + def submit_and_wait(self, request_data: Dict[str, Any], timeout: int = 300) -> Dict[str, Any]: + # Submit crawl job + response = requests.post(f"{self.base_url}/crawl", json=request_data, headers=self.headers) + if response.status_code == 403: + raise Exception("API token is invalid or missing") + task_id = response.json()["task_id"] + print(f"Task ID: {task_id}") + + # Poll for result + start_time = time.time() + while True: + if time.time() - start_time > timeout: + raise TimeoutError(f"Task {task_id} did not complete within {timeout} seconds") + + result = requests.get(f"{self.base_url}/task/{task_id}", headers=self.headers) + status = result.json() + + if status["status"] == "failed": + print("Task failed:", status.get("error")) + raise Exception(f"Task failed: {status.get('error')}") + + if status["status"] == "completed": + return status + + time.sleep(2) + + def submit_sync(self, request_data: Dict[str, Any]) -> Dict[str, Any]: + response = requests.post(f"{self.base_url}/crawl_sync", json=request_data, headers=self.headers, timeout=60) + if response.status_code == 408: + raise TimeoutError("Task did not complete within server timeout") + response.raise_for_status() + return response.json() + +def test_docker_deployment(version="basic"): + tester = Crawl4AiTester( + # base_url="http://localhost:11235" , + base_url="https://crawl4ai-sby74.ondigitalocean.app", + api_token="test" + ) + print(f"Testing Crawl4AI Docker {version} version") + + # Health check with timeout and retry + max_retries = 5 + for i in range(max_retries): + try: + health = requests.get(f"{tester.base_url}/health", timeout=10) + print("Health check:", health.json()) + break + except requests.exceptions.RequestException as e: + if i == max_retries - 1: + print(f"Failed to connect after {max_retries} attempts") + sys.exit(1) + print(f"Waiting for service to start (attempt {i+1}/{max_retries})...") + time.sleep(5) + + # Test cases based on version + test_basic_crawl(tester) + test_basic_crawl(tester) + test_basic_crawl_sync(tester) + + # if version in ["full", "transformer"]: + # test_cosine_extraction(tester) + + # test_js_execution(tester) + # test_css_selector(tester) + # test_structured_extraction(tester) + # test_llm_extraction(tester) + # test_llm_with_ollama(tester) + # test_screenshot(tester) + + +def test_basic_crawl(tester: Crawl4AiTester): + print("\n=== Testing Basic Crawl ===") + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 10, + "session_id": "test" + } + + result = tester.submit_and_wait(request) + print(f"Basic crawl result length: {len(result['result']['markdown'])}") + assert result["result"]["success"] + assert len(result["result"]["markdown"]) > 0 + +def test_basic_crawl_sync(tester: Crawl4AiTester): + print("\n=== Testing Basic Crawl (Sync) ===") + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 10, + "session_id": "test" + } + + result = tester.submit_sync(request) + print(f"Basic crawl result length: {len(result['result']['markdown'])}") + assert result['status'] == 'completed' + assert result['result']['success'] + assert len(result['result']['markdown']) > 0 + +def test_js_execution(tester: Crawl4AiTester): + print("\n=== Testing JS Execution ===") + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 8, + "js_code": [ + "const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); loadMoreButton && loadMoreButton.click();" + ], + "wait_for": "article.tease-card:nth-child(10)", + "crawler_params": { + "headless": True + } + } + + result = tester.submit_and_wait(request) + print(f"JS execution result length: {len(result['result']['markdown'])}") + assert result["result"]["success"] + +def test_css_selector(tester: Crawl4AiTester): + print("\n=== Testing CSS Selector ===") + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 7, + "css_selector": ".wide-tease-item__description", + "crawler_params": { + "headless": True + }, + "extra": {"word_count_threshold": 10} + + } + + result = tester.submit_and_wait(request) + print(f"CSS selector result length: {len(result['result']['markdown'])}") + assert result["result"]["success"] + +def test_structured_extraction(tester: Crawl4AiTester): + print("\n=== Testing Structured Extraction ===") + schema = { + "name": "Coinbase Crypto Prices", + "baseSelector": ".cds-tableRow-t45thuk", + "fields": [ + { + "name": "crypto", + "selector": "td:nth-child(1) h2", + "type": "text", + }, + { + "name": "symbol", + "selector": "td:nth-child(1) p", + "type": "text", + }, + { + "name": "price", + "selector": "td:nth-child(2)", + "type": "text", + } + ], + } + + request = { + "urls": "https://www.coinbase.com/explore", + "priority": 9, + "extraction_config": { + "type": "json_css", + "params": { + "schema": schema + } + } + } + + result = tester.submit_and_wait(request) + extracted = json.loads(result["result"]["extracted_content"]) + print(f"Extracted {len(extracted)} items") + print("Sample item:", json.dumps(extracted[0], indent=2)) + assert result["result"]["success"] + assert len(extracted) > 0 + +def test_llm_extraction(tester: Crawl4AiTester): + print("\n=== Testing LLM Extraction ===") + schema = { + "type": "object", + "properties": { + "model_name": { + "type": "string", + "description": "Name of the OpenAI model." + }, + "input_fee": { + "type": "string", + "description": "Fee for input token for the OpenAI model." + }, + "output_fee": { + "type": "string", + "description": "Fee for output token for the OpenAI model." + } + }, + "required": ["model_name", "input_fee", "output_fee"] + } + + request = { + "urls": "https://openai.com/api/pricing", + "priority": 8, + "extraction_config": { + "type": "llm", + "params": { + "provider": "openai/gpt-4o-mini", + "api_token": os.getenv("OPENAI_API_KEY"), + "schema": schema, + "extraction_type": "schema", + "instruction": """From the crawled content, extract all mentioned model names along with their fees for input and output tokens.""" + } + }, + "crawler_params": {"word_count_threshold": 1} + } + + try: + result = tester.submit_and_wait(request) + extracted = json.loads(result["result"]["extracted_content"]) + print(f"Extracted {len(extracted)} model pricing entries") + print("Sample entry:", json.dumps(extracted[0], indent=2)) + assert result["result"]["success"] + except Exception as e: + print(f"LLM extraction test failed (might be due to missing API key): {str(e)}") + +def test_llm_with_ollama(tester: Crawl4AiTester): + print("\n=== Testing LLM with Ollama ===") + schema = { + "type": "object", + "properties": { + "article_title": { + "type": "string", + "description": "The main title of the news article" + }, + "summary": { + "type": "string", + "description": "A brief summary of the article content" + }, + "main_topics": { + "type": "array", + "items": {"type": "string"}, + "description": "Main topics or themes discussed in the article" + } + } + } + + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 8, + "extraction_config": { + "type": "llm", + "params": { + "provider": "ollama/llama2", + "schema": schema, + "extraction_type": "schema", + "instruction": "Extract the main article information including title, summary, and main topics." + } + }, + "extra": {"word_count_threshold": 1}, + "crawler_params": {"verbose": True} + } + + try: + result = tester.submit_and_wait(request) + extracted = json.loads(result["result"]["extracted_content"]) + print("Extracted content:", json.dumps(extracted, indent=2)) + assert result["result"]["success"] + except Exception as e: + print(f"Ollama extraction test failed: {str(e)}") + +def test_cosine_extraction(tester: Crawl4AiTester): + print("\n=== Testing Cosine Extraction ===") + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 8, + "extraction_config": { + "type": "cosine", + "params": { + "semantic_filter": "business finance economy", + "word_count_threshold": 10, + "max_dist": 0.2, + "top_k": 3 + } + } + } + + try: + result = tester.submit_and_wait(request) + extracted = json.loads(result["result"]["extracted_content"]) + print(f"Extracted {len(extracted)} text clusters") + print("First cluster tags:", extracted[0]["tags"]) + assert result["result"]["success"] + except Exception as e: + print(f"Cosine extraction test failed: {str(e)}") + +def test_screenshot(tester: Crawl4AiTester): + print("\n=== Testing Screenshot ===") + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 5, + "screenshot": True, + "crawler_params": { + "headless": True + } + } + + result = tester.submit_and_wait(request) + print("Screenshot captured:", bool(result["result"]["screenshot"])) + + if result["result"]["screenshot"]: + # Save screenshot + screenshot_data = base64.b64decode(result["result"]["screenshot"]) + with open("test_screenshot.jpg", "wb") as f: + f.write(screenshot_data) + print("Screenshot saved as test_screenshot.jpg") + + assert result["result"]["success"] + +if __name__ == "__main__": + version = sys.argv[1] if len(sys.argv) > 1 else "basic" + # version = "full" + test_docker_deployment(version) \ No newline at end of file diff --git a/tests/test_docker.py b/tests/test_docker.py new file mode 100644 index 0000000..c22acd5 --- /dev/null +++ b/tests/test_docker.py @@ -0,0 +1,300 @@ +import requests +import json +import time +import sys +import base64 +import os +from typing import Dict, Any + +class Crawl4AiTester: + def __init__(self, base_url: str = "http://localhost:11235"): + self.base_url = base_url + + def submit_and_wait(self, request_data: Dict[str, Any], timeout: int = 300) -> Dict[str, Any]: + # Submit crawl job + response = requests.post(f"{self.base_url}/crawl", json=request_data) + task_id = response.json()["task_id"] + print(f"Task ID: {task_id}") + + # Poll for result + start_time = time.time() + while True: + if time.time() - start_time > timeout: + raise TimeoutError(f"Task {task_id} did not complete within {timeout} seconds") + + result = requests.get(f"{self.base_url}/task/{task_id}") + status = result.json() + + if status["status"] == "failed": + print("Task failed:", status.get("error")) + raise Exception(f"Task failed: {status.get('error')}") + + if status["status"] == "completed": + return status + + time.sleep(2) + +def test_docker_deployment(version="basic"): + tester = Crawl4AiTester() + print(f"Testing Crawl4AI Docker {version} version") + + # Health check with timeout and retry + max_retries = 5 + for i in range(max_retries): + try: + health = requests.get(f"{tester.base_url}/health", timeout=10) + print("Health check:", health.json()) + break + except requests.exceptions.RequestException as e: + if i == max_retries - 1: + print(f"Failed to connect after {max_retries} attempts") + sys.exit(1) + print(f"Waiting for service to start (attempt {i+1}/{max_retries})...") + time.sleep(5) + + # Test cases based on version + test_basic_crawl(tester) + + # if version in ["full", "transformer"]: + # test_cosine_extraction(tester) + + # test_js_execution(tester) + # test_css_selector(tester) + # test_structured_extraction(tester) + # test_llm_extraction(tester) + # test_llm_with_ollama(tester) + # test_screenshot(tester) + + +def test_basic_crawl(tester: Crawl4AiTester): + print("\n=== Testing Basic Crawl ===") + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 10 + } + + result = tester.submit_and_wait(request) + print(f"Basic crawl result length: {len(result['result']['markdown'])}") + assert result["result"]["success"] + assert len(result["result"]["markdown"]) > 0 + +def test_js_execution(tester: Crawl4AiTester): + print("\n=== Testing JS Execution ===") + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 8, + "js_code": [ + "const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); loadMoreButton && loadMoreButton.click();" + ], + "wait_for": "article.tease-card:nth-child(10)", + "crawler_params": { + "headless": True + } + } + + result = tester.submit_and_wait(request) + print(f"JS execution result length: {len(result['result']['markdown'])}") + assert result["result"]["success"] + +def test_css_selector(tester: Crawl4AiTester): + print("\n=== Testing CSS Selector ===") + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 7, + "css_selector": ".wide-tease-item__description", + "crawler_params": { + "headless": True + }, + "extra": {"word_count_threshold": 10} + + } + + result = tester.submit_and_wait(request) + print(f"CSS selector result length: {len(result['result']['markdown'])}") + assert result["result"]["success"] + +def test_structured_extraction(tester: Crawl4AiTester): + print("\n=== Testing Structured Extraction ===") + schema = { + "name": "Coinbase Crypto Prices", + "baseSelector": ".cds-tableRow-t45thuk", + "fields": [ + { + "name": "crypto", + "selector": "td:nth-child(1) h2", + "type": "text", + }, + { + "name": "symbol", + "selector": "td:nth-child(1) p", + "type": "text", + }, + { + "name": "price", + "selector": "td:nth-child(2)", + "type": "text", + } + ], + } + + request = { + "urls": "https://www.coinbase.com/explore", + "priority": 9, + "extraction_config": { + "type": "json_css", + "params": { + "schema": schema + } + } + } + + result = tester.submit_and_wait(request) + extracted = json.loads(result["result"]["extracted_content"]) + print(f"Extracted {len(extracted)} items") + print("Sample item:", json.dumps(extracted[0], indent=2)) + assert result["result"]["success"] + assert len(extracted) > 0 + +def test_llm_extraction(tester: Crawl4AiTester): + print("\n=== Testing LLM Extraction ===") + schema = { + "type": "object", + "properties": { + "model_name": { + "type": "string", + "description": "Name of the OpenAI model." + }, + "input_fee": { + "type": "string", + "description": "Fee for input token for the OpenAI model." + }, + "output_fee": { + "type": "string", + "description": "Fee for output token for the OpenAI model." + } + }, + "required": ["model_name", "input_fee", "output_fee"] + } + + request = { + "urls": "https://openai.com/api/pricing", + "priority": 8, + "extraction_config": { + "type": "llm", + "params": { + "provider": "openai/gpt-4o-mini", + "api_token": os.getenv("OPENAI_API_KEY"), + "schema": schema, + "extraction_type": "schema", + "instruction": """From the crawled content, extract all mentioned model names along with their fees for input and output tokens.""" + } + }, + "crawler_params": {"word_count_threshold": 1} + } + + try: + result = tester.submit_and_wait(request) + extracted = json.loads(result["result"]["extracted_content"]) + print(f"Extracted {len(extracted)} model pricing entries") + print("Sample entry:", json.dumps(extracted[0], indent=2)) + assert result["result"]["success"] + except Exception as e: + print(f"LLM extraction test failed (might be due to missing API key): {str(e)}") + +def test_llm_with_ollama(tester: Crawl4AiTester): + print("\n=== Testing LLM with Ollama ===") + schema = { + "type": "object", + "properties": { + "article_title": { + "type": "string", + "description": "The main title of the news article" + }, + "summary": { + "type": "string", + "description": "A brief summary of the article content" + }, + "main_topics": { + "type": "array", + "items": {"type": "string"}, + "description": "Main topics or themes discussed in the article" + } + } + } + + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 8, + "extraction_config": { + "type": "llm", + "params": { + "provider": "ollama/llama2", + "schema": schema, + "extraction_type": "schema", + "instruction": "Extract the main article information including title, summary, and main topics." + } + }, + "extra": {"word_count_threshold": 1}, + "crawler_params": {"verbose": True} + } + + try: + result = tester.submit_and_wait(request) + extracted = json.loads(result["result"]["extracted_content"]) + print("Extracted content:", json.dumps(extracted, indent=2)) + assert result["result"]["success"] + except Exception as e: + print(f"Ollama extraction test failed: {str(e)}") + +def test_cosine_extraction(tester: Crawl4AiTester): + print("\n=== Testing Cosine Extraction ===") + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 8, + "extraction_config": { + "type": "cosine", + "params": { + "semantic_filter": "business finance economy", + "word_count_threshold": 10, + "max_dist": 0.2, + "top_k": 3 + } + } + } + + try: + result = tester.submit_and_wait(request) + extracted = json.loads(result["result"]["extracted_content"]) + print(f"Extracted {len(extracted)} text clusters") + print("First cluster tags:", extracted[0]["tags"]) + assert result["result"]["success"] + except Exception as e: + print(f"Cosine extraction test failed: {str(e)}") + +def test_screenshot(tester: Crawl4AiTester): + print("\n=== Testing Screenshot ===") + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 5, + "screenshot": True, + "crawler_params": { + "headless": True + } + } + + result = tester.submit_and_wait(request) + print("Screenshot captured:", bool(result["result"]["screenshot"])) + + if result["result"]["screenshot"]: + # Save screenshot + screenshot_data = base64.b64decode(result["result"]["screenshot"]) + with open("test_screenshot.jpg", "wb") as f: + f.write(screenshot_data) + print("Screenshot saved as test_screenshot.jpg") + + assert result["result"]["success"] + +if __name__ == "__main__": + version = sys.argv[1] if len(sys.argv) > 1 else "basic" + # version = "full" + test_docker_deployment(version) \ No newline at end of file diff --git a/tests/test_main.py b/tests/test_main.py new file mode 100644 index 0000000..19f938c --- /dev/null +++ b/tests/test_main.py @@ -0,0 +1,281 @@ +import asyncio +import aiohttp +import json +import time +import os +from typing import Optional, Dict, Any +from pydantic import BaseModel, HttpUrl + +class NBCNewsAPITest: + def __init__(self, base_url: str = "http://localhost:8000"): + self.base_url = base_url + self.session = None + + async def __aenter__(self): + self.session = aiohttp.ClientSession() + return self + + async def __aexit__(self, exc_type, exc_val, exc_tb): + if self.session: + await self.session.close() + + async def submit_crawl(self, request_data: Dict[str, Any]) -> str: + async with self.session.post(f"{self.base_url}/crawl", json=request_data) as response: + result = await response.json() + return result["task_id"] + + async def get_task_status(self, task_id: str) -> Dict[str, Any]: + async with self.session.get(f"{self.base_url}/task/{task_id}") as response: + return await response.json() + + async def wait_for_task(self, task_id: str, timeout: int = 300, poll_interval: int = 2) -> Dict[str, Any]: + start_time = time.time() + while True: + if time.time() - start_time > timeout: + raise TimeoutError(f"Task {task_id} did not complete within {timeout} seconds") + + status = await self.get_task_status(task_id) + if status["status"] in ["completed", "failed"]: + return status + + await asyncio.sleep(poll_interval) + + async def check_health(self) -> Dict[str, Any]: + async with self.session.get(f"{self.base_url}/health") as response: + return await response.json() + +async def test_basic_crawl(): + print("\n=== Testing Basic Crawl ===") + async with NBCNewsAPITest() as api: + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 10 + } + task_id = await api.submit_crawl(request) + result = await api.wait_for_task(task_id) + print(f"Basic crawl result length: {len(result['result']['markdown'])}") + assert result["status"] == "completed" + assert "result" in result + assert result["result"]["success"] + +async def test_js_execution(): + print("\n=== Testing JS Execution ===") + async with NBCNewsAPITest() as api: + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 8, + "js_code": [ + "const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); loadMoreButton && loadMoreButton.click();" + ], + "wait_for": "article.tease-card:nth-child(10)", + "crawler_params": { + "headless": True + } + } + task_id = await api.submit_crawl(request) + result = await api.wait_for_task(task_id) + print(f"JS execution result length: {len(result['result']['markdown'])}") + assert result["status"] == "completed" + assert result["result"]["success"] + +async def test_css_selector(): + print("\n=== Testing CSS Selector ===") + async with NBCNewsAPITest() as api: + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 7, + "css_selector": ".wide-tease-item__description" + } + task_id = await api.submit_crawl(request) + result = await api.wait_for_task(task_id) + print(f"CSS selector result length: {len(result['result']['markdown'])}") + assert result["status"] == "completed" + assert result["result"]["success"] + +async def test_structured_extraction(): + print("\n=== Testing Structured Extraction ===") + async with NBCNewsAPITest() as api: + schema = { + "name": "NBC News Articles", + "baseSelector": "article.tease-card", + "fields": [ + { + "name": "title", + "selector": "h2", + "type": "text" + }, + { + "name": "description", + "selector": ".tease-card__description", + "type": "text" + }, + { + "name": "link", + "selector": "a", + "type": "attribute", + "attribute": "href" + } + ] + } + + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 9, + "extraction_config": { + "type": "json_css", + "params": { + "schema": schema + } + } + } + task_id = await api.submit_crawl(request) + result = await api.wait_for_task(task_id) + extracted = json.loads(result["result"]["extracted_content"]) + print(f"Extracted {len(extracted)} articles") + assert result["status"] == "completed" + assert result["result"]["success"] + assert len(extracted) > 0 + +async def test_batch_crawl(): + print("\n=== Testing Batch Crawl ===") + async with NBCNewsAPITest() as api: + request = { + "urls": [ + "https://www.nbcnews.com/business", + "https://www.nbcnews.com/business/consumer", + "https://www.nbcnews.com/business/economy" + ], + "priority": 6, + "crawler_params": { + "headless": True + } + } + task_id = await api.submit_crawl(request) + result = await api.wait_for_task(task_id) + print(f"Batch crawl completed, got {len(result['results'])} results") + assert result["status"] == "completed" + assert "results" in result + assert len(result["results"]) == 3 + +async def test_llm_extraction(): + print("\n=== Testing LLM Extraction with Ollama ===") + async with NBCNewsAPITest() as api: + schema = { + "type": "object", + "properties": { + "article_title": { + "type": "string", + "description": "The main title of the news article" + }, + "summary": { + "type": "string", + "description": "A brief summary of the article content" + }, + "main_topics": { + "type": "array", + "items": {"type": "string"}, + "description": "Main topics or themes discussed in the article" + } + }, + "required": ["article_title", "summary", "main_topics"] + } + + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 8, + "extraction_config": { + "type": "llm", + "params": { + "provider": "openai/gpt-4o-mini", + "api_key": os.getenv("OLLAMA_API_KEY"), + "schema": schema, + "extraction_type": "schema", + "instruction": """Extract the main article information including title, a brief summary, and main topics discussed. + Focus on the primary business news article on the page.""" + } + }, + "crawler_params": { + "headless": True, + "word_count_threshold": 1 + } + } + + task_id = await api.submit_crawl(request) + result = await api.wait_for_task(task_id) + + if result["status"] == "completed": + extracted = json.loads(result["result"]["extracted_content"]) + print(f"Extracted article analysis:") + print(json.dumps(extracted, indent=2)) + + assert result["status"] == "completed" + assert result["result"]["success"] + +async def test_screenshot(): + print("\n=== Testing Screenshot ===") + async with NBCNewsAPITest() as api: + request = { + "urls": "https://www.nbcnews.com/business", + "priority": 5, + "screenshot": True, + "crawler_params": { + "headless": True + } + } + task_id = await api.submit_crawl(request) + result = await api.wait_for_task(task_id) + print("Screenshot captured:", bool(result["result"]["screenshot"])) + assert result["status"] == "completed" + assert result["result"]["success"] + assert result["result"]["screenshot"] is not None + +async def test_priority_handling(): + print("\n=== Testing Priority Handling ===") + async with NBCNewsAPITest() as api: + # Submit low priority task first + low_priority = { + "urls": "https://www.nbcnews.com/business", + "priority": 1, + "crawler_params": {"headless": True} + } + low_task_id = await api.submit_crawl(low_priority) + + # Submit high priority task + high_priority = { + "urls": "https://www.nbcnews.com/business/consumer", + "priority": 10, + "crawler_params": {"headless": True} + } + high_task_id = await api.submit_crawl(high_priority) + + # Get both results + high_result = await api.wait_for_task(high_task_id) + low_result = await api.wait_for_task(low_task_id) + + print("Both tasks completed") + assert high_result["status"] == "completed" + assert low_result["status"] == "completed" + +async def main(): + try: + # Start with health check + async with NBCNewsAPITest() as api: + health = await api.check_health() + print("Server health:", health) + + # Run all tests + # await test_basic_crawl() + # await test_js_execution() + # await test_css_selector() + # await test_structured_extraction() + await test_llm_extraction() + # await test_batch_crawl() + # await test_screenshot() + # await test_priority_handling() + + except Exception as e: + print(f"Test failed: {str(e)}") + raise + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file