From d9cc16963ff66e71b431707c236770554dd3d510 Mon Sep 17 00:00:00 2001
From: Andrwe Lord Weber <github@andrwe.org>
Date: Thu, 11 Jan 2024 13:27:33 +0100
Subject: [PATCH 01/11] improve output & add check for runtime logs

* privoxy-blocklist: remove unnessary newline
* privoxy-blocklist: improve debug() function
* tests: add check for errors in privoxy runtime log

Signed-off-by: Andrwe Lord Weber <github@andrwe.org>
---
 privoxy-blocklist.sh          | 86 +++++++++++++++++++----------------
 tests/conftest.py             | 26 ++++++-----
 tests/test_01_root_execute.py |  4 ++
 3 files changed, 64 insertions(+), 52 deletions(-)

diff --git a/privoxy-blocklist.sh b/privoxy-blocklist.sh
index 8fd2b7b..314ad28 100755
--- a/privoxy-blocklist.sh
+++ b/privoxy-blocklist.sh
@@ -69,7 +69,7 @@ function get_config_path() {
 
 function prepare() {
     if [ ${UID} -ne 0 ]; then
-        error -e "Root privileges needed. Exit.\n"
+        error "Root privileges needed. Exit."
         usage
         exit 1
     fi
@@ -82,8 +82,6 @@ function prepare() {
         fi
     done
 
-    OS="$(uname)"
-
     if [ -z "${SCRIPTCONF:-}" ]; then
         get_config_path
     fi
@@ -128,7 +126,7 @@ EOF
     fi
 
     if [[ ! -r "${SCRIPTCONF}" ]]; then
-        debug "Can't read ${SCRIPTCONF}. Permission denied." -1
+        debug -1 "Can't read ${SCRIPTCONF}. Permission denied."
     fi
 
     # shellcheck disable=SC1090
@@ -157,8 +155,7 @@ EOF
                 PRIVOXY_CONF="/etc/privoxy/config"
                 ;;
         esac
-        PRIVOXY_CONF="/etc/privoxy/config"
-        info "\$PRIVOXY_CONF isn't set, falling back to '/etc/privoxy/config'"
+        info "\$PRIVOXY_CONF isn't set, falling back to '${PRIVOXY_CONF}'"
     fi
     if [[ -z "${PRIVOXY_USER:-}" ]]; then
         PRIVOXY_USER="privoxy"
@@ -174,8 +171,14 @@ EOF
 }
 
 function debug() {
-    if [ "${DBG}" -ge "${2}" ]; then
-        echo -e "${1}"
+    local expected_level="${1}"
+    shift 1
+    if [ "${DBG}" -ge "${expected_level}" ]; then
+        if [ "${expected_level}" -eq 0 ]; then
+            info "${@}"
+        else
+            printf '%s\n' "${@}"
+        fi
     fi
 }
 
@@ -190,7 +193,7 @@ function info() {
 # shellcheck disable=SC2317
 function main() {
     for url in "${URLS[@]}"; do
-        debug "Processing ${url} ...\n" 0
+        debug 0 "Processing ${url} ..."
         file="${TMPDIR}/$(basename "${url}")"
         address_file="${TMPDIR}/$(basename "${url}").address"
         address_except_file="${TMPDIR}/$(basename "${url}").address_except"
@@ -205,10 +208,10 @@ function main() {
         list="$(basename "${file%\.*}")"
 
         # download list
-        debug "Downloading ${url} ..." 0
+        debug 0 "Downloading ${url} ..."
         wget -t 3 --no-check-certificate -O "${file}" "${url}" > "${TMPDIR}/wget-${url//\//#}.log" 2>&1
-        debug "$(cat "${TMPDIR}/wget-${url//\//#}.log")" 2
-        debug ".. downloading done." 0
+        debug 2 "$(cat "${TMPDIR}/wget-${url//\//#}.log")"
+        debug 0 ".. downloading done."
         if ! grep -qE '^.*\[Adblock.*\].*$' "${file}"; then
             info "The list recieved from ${url} does not contain AdblockPlus list header. Try to process anyway."
         fi
@@ -233,76 +236,76 @@ function main() {
 
         # convert AdblockPlus list to Privoxy list
         # blacklist of urls
-        debug "Creating actionfile for ${list} ..." 1
+        debug 1 "Creating actionfile for ${list} ..."
         echo -e "{ +block{${list}} }" > "${actionfile}"
         sed '/\$.*/d;/#/d;s/\?/\\?/g;s/\*/.*/g;s/(/\\(/g;s/)/\\)/g;s/\[/\\[/g;s/\]/\\]/g;s/\^$//g;s/^||/\./g;s/^|/^/g;s/|$/\$/g;/|/d' "${domain_name_file}" >> "${actionfile}"
 
-        debug "... creating filterfile for ${list} ..." 1
+        debug 1 "... creating filterfile for ${list} ..."
         echo "FILTER: ${list} Tag filter of ${list}" > "${filterfile}"
         # set filter for html elements
         sed '/^#/!d;s/^##//g;s/^#\(.*\)\[.*\]\[.*\]*/s@<([a-zA-Z0-9]+)\\s+.*id=.?\1.*>.*<\/\\1>@@g/g;s/^#\(.*\)/s@<([a-zA-Z0-9]+)\\s+.*id=.?\1.*>.*<\/\\1>@@g/g;s/^\.\(.*\)/s@<([a-zA-Z0-9]+)\\s+.*class=.?\1.*>.*<\/\\1>@@g/g;s/^a\[\(.*\)\]/s@<a.*\1.*>.*<\/a>@@g/g;s/^\([a-zA-Z0-9]*\)\.\(.*\)\[.*\]\[.*\]*/s@<\1.*class=.?\2.*>.*<\/\1>@@g/g;s/^\([a-zA-Z0-9]*\)#\(.*\):.*[\:[^:]]*[^:]*/s@<\1.*id=.?\2.*>.*<\/\1>@@g/g;s/^\([a-zA-Z0-9]*\)#\(.*\)/s@<\1.*id=.?\2.*>.*<\/\1>@@g/g;s/^\[\([a-zA-Z]*\).=\(.*\)\]/s@\1^=\2>@@g/g;s/\^/[\/\&:\?=_]/g;s/\.\([a-zA-Z0-9]\)/\\.\1/g' "${file}" >> "${filterfile}"
-        debug "... filterfile created - adding filterfile to actionfile ..." 1
+        debug 1 "... filterfile created - adding filterfile to actionfile ..."
         echo "{ +filter{${list}} }" >> "${actionfile}"
         echo "*" >> "${actionfile}"
-        debug "... filterfile added ..." 1
+        debug 1 "... filterfile added ..."
 
         # create domain based whitelist
 
         # create domain based blacklist
         #    domains=$(sed '/^#/d;/#/!d;s/,~/,\*/g;s/~/;:\*/g;s/^\([a-zA-Z]\)/;:\1/g' ${file})
-        #    [ -n "${domains}" ] && debug "... creating domainbased filterfiles ..." 1
-        #    debug "Found Domains: ${domains}." 2
+        #    [ -n "${domains}" ] && debug 1 "... creating domainbased filterfiles ..."
+        #    debug 2 "Found Domains: ${domains}."
         #    ifs=$IFS
         #    IFS=";:"
         #    for domain in ${domains}
         #    do
         #      dns=$(echo ${domain} | awk -F ',' '{print $1}' | awk -F '#' '{print $1}')
-        #      debug "Modifying line: ${domain}" 2
-        #      debug "   ... creating filterfile for ${dns} ..." 1
+        #      debug 2 "Modifying line: ${domain}"
+        #      debug 1 "   ... creating filterfile for ${dns} ..."
         #      sed '' ${file} > ${file%\.*}-${dns%~}.script.filter
-        #      debug "   ... filterfile created ..." 1
-        #      debug "   ... adding filterfile for ${dns} to actionfile ..." 1
+        #      debug 1 "   ... filterfile created ..."
+        #      debug 1 "   ... adding filterfile for ${dns} to actionfile ..."
         #      echo "{ +filter{${list}-${dns}} }" >> ${actionfile}
         #      echo "${dns}" >> ${actionfile}
-        #      debug "   ... filterfile added ..." 1
+        #      debug 1 "   ... filterfile added ..."
         #    done
         #    IFS=${ifs}
-        #    debug "... all domainbased filterfiles created ..." 1
+        #    debug 1 "... all domainbased filterfiles created ..."
 
-        debug "... creating and adding whitlist for urls ..." 1
+        debug 1 "... creating and adding whitlist for urls ..."
         # whitelist of urls
         echo "{ -block }" >> "${actionfile}"
         sed 's/^@@//g;/\$.*/d;/#/d;s/\./\\./g;s/\?/\\?/g;s/\*/.*/g;s/(/\\(/g;s/)/\\)/g;s/\[/\\[/g;s/\]/\\]/g;s/\^/[\/\&:\?=_]/g;s/^||/\./g;s/^|/^/g;s/|$/\$/g;/|/d' "${domain_name_except_file}" >> "${actionfile}"
-        debug "... created and added whitelist - creating and adding image handler ..." 1
+        debug 1 "... created and added whitelist - creating and adding image handler ..."
         # whitelist of image urls
         echo "{ -block +handle-as-image }" >> "${actionfile}"
         sed '/^@@.*/!d;s/^@@//g;/\$.*image.*/!d;s/\$.*image.*//g;/#/d;s/\./\\./g;s/\?/\\?/g;s/\*/.*/g;s/(/\\(/g;s/)/\\)/g;s/\[/\\[/g;s/\]/\\]/g;s/\^/[\/\&:\?=_]/g;s/^||/\./g;s/^|/^/g;s/|$/\$/g;/|/d' "${file}" >> "${actionfile}"
-        debug "... created and added image handler ..." 1
-        debug "... created actionfile for ${list}." 1
+        debug 1 "... created and added image handler ..."
+        debug 1 "... created actionfile for ${list}."
 
         # install Privoxy actionsfile
         install -o "${PRIVOXY_USER}" -g "${PRIVOXY_GROUP}" "${VERBOSE[@]}" "${actionfile}" "${PRIVOXY_DIR}"
         if ! grep -q "$(basename "${actionfile}")" "${PRIVOXY_CONF}"; then
-            debug "\nModifying ${PRIVOXY_CONF} ..." 0
+            debug 0 "Modifying ${PRIVOXY_CONF} ..."
             sed "s/^actionsfile user\.action/actionsfile $(basename "${actionfile}")\nactionsfile user.action/" "${PRIVOXY_CONF}" > "${TMPDIR}/config"
-            debug "... modification done.\n" 0
-            debug "Installing new config ..." 0
+            debug 0 "... modification done."
+            debug 0 "Installing new config ..."
             install -o "${PRIVOXY_USER}" -g "${PRIVOXY_GROUP}" "${VERBOSE[@]}" "${TMPDIR}/config" "${PRIVOXY_CONF}"
-            debug "... installation done\n" 0
+            debug 0 "... installation done"
         fi
 
         # install Privoxy filterfile
         install -o "${PRIVOXY_USER}" -g "${PRIVOXY_GROUP}" "${VERBOSE[@]}" "${filterfile}" "${PRIVOXY_DIR}"
         if ! grep -q "$(basename "${filterfile}")" "${PRIVOXY_CONF}"; then
-            debug "\nModifying ${PRIVOXY_CONF} ..." 0
+            debug 0 "Modifying ${PRIVOXY_CONF} ..."
             sed "s/^\(#*\)filterfile user\.filter/filterfile $(basename "${filterfile}")\n\1filterfile user.filter/" "${PRIVOXY_CONF}" > "${TMPDIR}/config"
-            debug "... modification done.\n" 0
-            debug "Installing new config ..." 0
+            debug 0 "... modification done."
+            debug 0 "Installing new config ..."
             install -o "${PRIVOXY_USER}" -g "${PRIVOXY_GROUP}" "${VERBOSE[@]}" "${TMPDIR}/config" "${PRIVOXY_CONF}"
-            debug "... installation done\n" 0
+            debug 0 "... installation done"
         fi
 
-        debug "... ${url} installed successfully.\n" 0
+        debug 0 "... ${url} installed successfully."
     done
 }
 
@@ -319,9 +322,9 @@ function lock() {
             echo "An instance of ${TMPNAME} is already running. Exit"
             exit 1
         fi
-        debug "Found dead lock file." 0
+        debug 0 "Found dead lock file."
         rm -f "${PID_FILE}"
-        debug "File removed." 0
+        debug 0 "File removed."
     fi
 
     # safe PID in lock-file
@@ -346,6 +349,7 @@ function remove() {
 
 VERBOSE=()
 method="main"
+OS="$(uname)"
 
 # loop for options
 while getopts ":c:hrqv:V" opt; do
@@ -384,7 +388,9 @@ prepare
 trap 'rm -fr "${TMPDIR}";exit' INT TERM EXIT
 
 lock
-debug "URL-List: ${URLS}\nPrivoxy-Configdir: ${PRIVOXY_DIR}\nTemporary directory: ${TMPDIR}" 2
+debug 2 "URL-List: ${URLS[*]}"
+debug 2 "Privoxy-Configdir: ${PRIVOXY_DIR}"
+debug 2 "Temporary directory: ${TMPDIR}"
 "${method}"
 
 # restore default exit command
diff --git a/tests/conftest.py b/tests/conftest.py
index e6d037f..232f0f2 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -4,14 +4,14 @@
 import os
 from pathlib import Path
 from re import search
-from typing import Dict, Generator, Optional, cast
+from typing import Generator, Optional
 
 import pytest
 import requests
-from pytest import CollectReport, StashKey
+from pytest import StashKey
 from pytestshellutils.shell import Daemon
 
-phase_report_key = StashKey[Dict[str, CollectReport]]()
+phase_report_key = StashKey[int]()
 
 
 def debug_enabled() -> bool:
@@ -35,12 +35,12 @@ def pytest_runtest_makereport(item: pytest.Item):
     report = yield
 
     if item.parent:
+        # store test results for each phase ("setup", "call", "teardown") of each test
+        # within module-scope
+        if phase_report_key not in item.parent.stash:
+            item.parent.stash.setdefault(phase_report_key, 0)
         if report.failed:
-            # store test results for each phase ("setup", "call", "teardown") of each test
-            # within module-scope
-            item.parent.stash.setdefault(
-                phase_report_key, cast(Dict[str, CollectReport], {})
-            )[f"{report.nodeid}_{report.when}"] = report
+            item.parent.stash[phase_report_key] += 1
 
     return report
 
@@ -95,12 +95,14 @@ def start_privoxy(request: pytest.FixtureRequest) -> Generator[bool, None, None]
     run.start()
     yield run.is_running()
     run_result = run.terminate()
+    logs = run_result.stdout + run_result.stderr
     # request.node is an "module" because we use the "module" scope
     node = request.node
-    if (phase_report_key in node.stash) and len(node.stash[phase_report_key]) > 0:
-        print(
-            f"\n\nprivoxy-results\n  stdout:\n{run_result.stdout}\n  stderr:\n{run_result.stderr}"
-        )
+    if (
+        (phase_report_key in node.stash) and node.stash[phase_report_key] > 0
+    ) or " Error: " in logs:
+        print(f"\n\nprivoxy-logs\n{logs}")
+    assert " Error: " not in logs
 
 
 @pytest.fixture(scope="module")
diff --git a/tests/test_01_root_execute.py b/tests/test_01_root_execute.py
index fa722fe..ab5805f 100644
--- a/tests/test_01_root_execute.py
+++ b/tests/test_01_root_execute.py
@@ -150,6 +150,10 @@ def test_missing_deps(shell, privoxy_blocklist) -> None:
     assert "Please install the package providing" in ret_script.stderr
 
 
+def test_privoxy_runtime_log() -> None:
+    """NOOP function to support checking privoxy logs during tear-down."""
+
+
 # Heloer functions
 
 

From f5398bb686c08885608054ab203fedde00d9411f Mon Sep 17 00:00:00 2001
From: Andrwe Lord Weber <github@andrwe.org>
Date: Sun, 14 Jan 2024 23:09:47 +0100
Subject: [PATCH 02/11] improve readability of sed-commands

Signed-off-by: Andrwe Lord Weber <github@andrwe.org>
---
 privoxy-blocklist.sh | 99 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 86 insertions(+), 13 deletions(-)

diff --git a/privoxy-blocklist.sh b/privoxy-blocklist.sh
index 314ad28..992e139 100755
--- a/privoxy-blocklist.sh
+++ b/privoxy-blocklist.sh
@@ -195,14 +195,15 @@ function main() {
     for url in "${URLS[@]}"; do
         debug 0 "Processing ${url} ..."
         file="${TMPDIR}/$(basename "${url}")"
-        address_file="${TMPDIR}/$(basename "${url}").address"
-        address_except_file="${TMPDIR}/$(basename "${url}").address_except"
-        url_file="${TMPDIR}/$(basename "${url}").url"
-        url_except_file="${TMPDIR}/$(basename "${url}").url_except"
-        domain_name_file="${TMPDIR}/$(basename "${url}").domain"
-        domain_name_except_file="${TMPDIR}/$(basename "${url}").domain_except"
-        regex_file="${TMPDIR}/$(basename "${url}").regex"
-        regex_except_file="${TMPDIR}/$(basename "${url}").regex_except"
+        address_file="${file}.address"
+        address_except_file="${file}.address_except"
+        url_file="${file}.url"
+        url_except_file="${file}.url_except"
+        domain_name_file="${file}.domain"
+        domain_name_except_file="${file}.domain_except"
+        regex_file="${file}.regex"
+        regex_except_file="${file}.regex_except"
+        html_file="${file}.html"
         actionfile=${file%\.*}.script.action
         filterfile=${file%\.*}.script.filter
         list="$(basename "${file%\.*}")"
@@ -232,21 +233,93 @@ function main() {
         ## regex block
         grep '^/^' "${file}" > "${regex_file}"
         grep '^@@/^' "${file}" > "${regex_except_file}"
+        ## html element block
+        grep '^.*##..*' "${file}" > "${html_file}"
         set -e
 
         # convert AdblockPlus list to Privoxy list
         # blacklist of urls
         debug 1 "Creating actionfile for ${list} ..."
-        echo -e "{ +block{${list}} }" > "${actionfile}"
-        sed '/\$.*/d;/#/d;s/\?/\\?/g;s/\*/.*/g;s/(/\\(/g;s/)/\\)/g;s/\[/\\[/g;s/\]/\\]/g;s/\^$//g;s/^||/\./g;s/^|/^/g;s/|$/\$/g;/|/d' "${domain_name_file}" >> "${actionfile}"
+        echo "{ +block{${list}} }" > "${actionfile}"
+        sed '
+        # skip domains with additional filter definition
+        /\$.*/d
+        # skip domains with HTML filter
+        /#/d
+        # replace characters to match Privoxy domain syntax
+        s/\?/\\?/g;s/\*/.*/g;s/(/\\(/g;s/)/\\)/g;s/\[/\\[/g;s/\]/\\]/g
+        # replace marking seperator of Adblock
+        s/\^$//g
+        # replace domain matcher
+        s/^||/\./g
+        ' "${domain_name_file}" >> "${actionfile}"
+        sed '
+        # skip domains with additional filter definition
+        /\$.*/d
+        # skip domains with HTML filter
+        /#/d
+        # replace characters to match Privoxy domain syntax
+        s/\?/\\?/g;s/\*/.*/g;s/(/\\(/g;s/)/\\)/g;s/\[/\\[/g;s/\]/\\]/g
+        # replace marking seperator of Adblock
+        s/\^$//g
+        # handle exact domain matching
+        s/^|\([^|][^|]*\)|/^\1\$/g;s/|$/\$/g
+        ' "${address_file}" >> "${actionfile}"
 
         debug 1 "... creating filterfile for ${list} ..."
         echo "FILTER: ${list} Tag filter of ${list}" > "${filterfile}"
-        # set filter for html elements
-        sed '/^#/!d;s/^##//g;s/^#\(.*\)\[.*\]\[.*\]*/s@<([a-zA-Z0-9]+)\\s+.*id=.?\1.*>.*<\/\\1>@@g/g;s/^#\(.*\)/s@<([a-zA-Z0-9]+)\\s+.*id=.?\1.*>.*<\/\\1>@@g/g;s/^\.\(.*\)/s@<([a-zA-Z0-9]+)\\s+.*class=.?\1.*>.*<\/\\1>@@g/g;s/^a\[\(.*\)\]/s@<a.*\1.*>.*<\/a>@@g/g;s/^\([a-zA-Z0-9]*\)\.\(.*\)\[.*\]\[.*\]*/s@<\1.*class=.?\2.*>.*<\/\1>@@g/g;s/^\([a-zA-Z0-9]*\)#\(.*\):.*[\:[^:]]*[^:]*/s@<\1.*id=.?\2.*>.*<\/\1>@@g/g;s/^\([a-zA-Z0-9]*\)#\(.*\)/s@<\1.*id=.?\2.*>.*<\/\1>@@g/g;s/^\[\([a-zA-Z]*\).=\(.*\)\]/s@\1^=\2>@@g/g;s/\^/[\/\&:\?=_]/g;s/\.\([a-zA-Z0-9]\)/\\.\1/g' "${file}" >> "${filterfile}"
+        debug 1 "... processing 'class'-matches ..."
+        sed '
+        # only process gloabl classes
+        /^##\..*/!d
+        # cleanup
+        s/^##//g
+        # convert classes independent of HTML tag
+        s/^\.\(.*\)/s@<([a-zA-Z0-9]+)\\s+.*class=.?\1.*>.*<\/\\1>@@g/g
+        # convert classes with defined HTML tag
+        s/^\([a-zA-Z0-9]*\)\.\(.*\)\[.*\]\[.*\]*/s@<\1.*class=.?\2.*>.*<\/\1>@@g/g
+        ' "${html_file}" >> "${filterfile}"
+        # FIXME: add class handling with domains
+
+        debug 1 "... processing 'id'-matches ..."
+        sed '
+        # only process gloabl classes
+        /^###.*/!d
+        # cleanup
+        s/^##//g
+        # convert id independent of HTML tag
+        s/^#\(.*\)/s@<.*id=.?\1.*>.*<\/@@g/g
+        # convert id with defined HTML tag and extended selectors
+        s/^\([a-zA-Z0-9][a-zA-Z0-9]*\)#\(.*\):.*[\:[^:]]*[^:]*/s@<\1.*id=.?\2.*>.*<\/\1>@@g/g
+        # convert id with defined HTML tag
+        s/^\([a-zA-Z0-9][a-zA-Z0-9]*\)#\(.*\)/s@<\1.*id=.?\2.*>.*<\/\1>@@g/g
+        ' "${html_file}" >> "${filterfile}"
+        # FIXME: add id handling with domains
+
+        debug 1 "... processing 'attribute'-matches ..."
+        sed '
+        # only process gloabl classes
+        /^##\[.*/!d
+        # cleanup
+        s/^##//g
+        # convert attribute based filters with exact match with exact match
+        s/^\[\([^=^]*\)"*=\(.*\)\]/s@\1=\2>@@g/g
+        # convert attribute based filter with contain match
+        s/^\[\([^=^]*\)"*\*="*\([^"]*\)"*\]/s@\1=".*\2.*">@@g/g
+        # convert attribute based filter with startwith match
+        s/^\[\([^=]*\)"*^="*\([^"]*\)"*\]/s@\1="\2.*">@@g/g
+        # convert attribute based filter with endswith match
+        s/^\[\([^=^]*\)"*\$="*\([^"]*\)"*\]/s@\1=".*\2">@@g/g
+        # convert attribute name-only matches
+        s/^\[\(.*\)"*\]/s@<.*\1.*\/>@@g\ns@<\([^ ]*\) .*\1.*>.*<\/\\1.*>@@g/g
+        # convert dots
+        s/\.\([a-zA-Z0-9]\)/\\.\1/g
+        ' "${html_file}" >> "${filterfile}"
+        # FIXME: add attribute handling with domains
+
         debug 1 "... filterfile created - adding filterfile to actionfile ..."
         echo "{ +filter{${list}} }" >> "${actionfile}"
-        echo "*" >> "${actionfile}"
+        echo ".*" >> "${actionfile}"
         debug 1 "... filterfile added ..."
 
         # create domain based whitelist

From ea42f8ab7d7cf31c3f5ed08dac1c5fdca605aaec Mon Sep 17 00:00:00 2001
From: Andrwe Lord Weber <github@andrwe.org>
Date: Mon, 15 Jan 2024 23:44:00 +0100
Subject: [PATCH 03/11] implement content tests & move to setup.cfg

* tests: implement logix to check for content removed by privoxy
* tests: centralize test configuration in config.py
* tests: move config for python linting tools to centralized setup.cfg

Signed-off-by: Andrwe Lord Weber <github@andrwe.org>
---
 .flake8                       |   2 -
 tests/config.py               |  69 ++++++++++++++++++
 tests/conftest.py             |  10 +++
 tests/requirements.txt        |   1 +
 tests/response.html           |   9 +++
 tests/setup.cfg               |   5 ++
 tests/test_00_minimal.py      |   2 +-
 tests/test_01_root_execute.py | 129 ++++++++++++++--------------------
 8 files changed, 147 insertions(+), 80 deletions(-)
 delete mode 100644 .flake8
 create mode 100644 tests/config.py
 create mode 100644 tests/response.html
 create mode 100644 tests/setup.cfg

diff --git a/.flake8 b/.flake8
deleted file mode 100644
index 61d9081..0000000
--- a/.flake8
+++ /dev/null
@@ -1,2 +0,0 @@
-[flake8]
-max-line-length = 99
diff --git a/tests/config.py b/tests/config.py
new file mode 100644
index 0000000..83f1bad
--- /dev/null
+++ b/tests/config.py
@@ -0,0 +1,69 @@
+"""Configuration of test suite to configure tests."""
+
+from conftest import check_in, check_not_in
+
+content_removed = [
+    "ad_970x250",  # class match: https://www.iphoneitalia.com/
+    "MyAdsId3",  # id match
+    "AdRight2",  # class match with element having multiple classes
+]
+content_exists = [
+    "ajlkl",  # should exist, although one element is removed by privoxy
+]
+
+# FIXME: see https://github.com/Andrwe/privoxy-blocklist/issues/35
+urls_allowed = ["duckduckgo.com/", "hs-exp.jp/ads/"]
+urls_allowed = ["duckduckgo.com/"]
+
+# FIXME: implement regex-filter for domains, e.g.
+#   /^https?:\/\/s3\.*.*\.amazonaws\.com\/[a-f0-9]{45,}\/[a-f,0-9]{8,10}$/$script,
+#       third-party,xmlhttprequest,domain=~amazon.com
+urls_blocked = [
+    "andrwe.org/ads/",
+    "andrwe.jp/ads/",
+    "pubfeed.linkby.com",
+    f"s3.{'a'*6}.amazonaws.com/{'0123abcd'*6}/{'ab,12'*2}/",
+]
+urls_blocked = ["andrwe.org/ads/", "andrwe.jp/ads/", "pubfeed.linkby.com"]
+
+config_checks = {
+    "url_extended_config.conf": [
+        (
+            check_in,
+            "Processing https://raw.githubusercontent.com/easylist/easylist/master/"
+            "easylist/easylist_allowlist_general_hide.txt",
+        ),
+        (
+            check_in,
+            "Processing https://easylist-downloads.adblockplus.org/easylistgermany.txt",
+        ),
+        (
+            check_in,
+            "The list recieved from https://raw.githubusercontent.com/easylist/easylist/master"
+            "/easylist/easylist_allowlist_general_hide.txt does not contain AdblockPlus list "
+            "header. Try to process anyway.",
+        ),
+        (
+            check_not_in,
+            "created and added image handler",
+        ),
+    ],
+    "debugging.conf": [
+        (
+            check_in,
+            "Processing https://easylist-downloads.adblockplus.org/easylistgermany.txt",
+        ),
+        (
+            check_not_in,
+            "does not contain AdblockPlus list header.",
+        ),
+        (
+            check_in,
+            "‘/tmp/privoxy-blocklist.sh/easylist.txt’ saved",
+        ),
+        (
+            check_in,
+            "created and added image handler",
+        ),
+    ],
+}
diff --git a/tests/conftest.py b/tests/conftest.py
index 232f0f2..be9238d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -26,6 +26,16 @@ def debug_enabled() -> bool:
     )
 
 
+def check_in(needle: str, haystack: str) -> bool:
+    """Check given haystack for given string."""
+    return needle in haystack
+
+
+def check_not_in(needle: str, haystack: str) -> bool:
+    """Check that given string is not in given text."""
+    return needle not in haystack
+
+
 # based on
 # https://docs.pytest.org/en/latest/example/simple.html#making-test-result-information-available-in-fixtures
 @pytest.hookimpl(wrapper=True, tryfirst=True)
diff --git a/tests/requirements.txt b/tests/requirements.txt
index 295a104..d9def25 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,4 +1,5 @@
 pytest
 pytest-durations
+pytest-httpserver
 pytest-shell-utilities
 requests
diff --git a/tests/response.html b/tests/response.html
new file mode 100644
index 0000000..196a7b7
--- /dev/null
+++ b/tests/response.html
@@ -0,0 +1,9 @@
+<html>
+	<body>
+		<div>just-some-test-string-always-present</div>
+		<div class="ad_970x250">single class should be removed</div>
+		<div class="asd ajlkl AdRight2">multiple classes that should be removed</div>
+		<div class="asd ajlkl"> multiple classes that should exist </div>
+		<div id="MyAdsId3">id should be removed</div>
+	</body>
+</html>
diff --git a/tests/setup.cfg b/tests/setup.cfg
new file mode 100644
index 0000000..035f93b
--- /dev/null
+++ b/tests/setup.cfg
@@ -0,0 +1,5 @@
+[pycodestyle]
+max-line-length = 99
+
+[flake8]
+max-line-length = 99
diff --git a/tests/test_00_minimal.py b/tests/test_00_minimal.py
index e86afff..0402a8a 100644
--- a/tests/test_00_minimal.py
+++ b/tests/test_00_minimal.py
@@ -11,7 +11,6 @@ def test_permissions() -> None:
         ".ci_config/bandit.yml",
         ".ci_config/prospector.yaml",
         ".editorconfig",
-        ".flake8",
         ".github/release.yml",
         ".github/workflows/pytest.yml",
         ".github/workflows/release.yml",
@@ -22,6 +21,7 @@ def test_permissions() -> None:
         "tests/Dockerfile_alpine",
         "tests/Dockerfile_ubuntu",
         "tests/requirements.txt",
+        "tests/setup.cfg",
         "tests/test_00_minimal.py",
         "tests/test_01_root_execute.py",
     ]
diff --git a/tests/test_01_root_execute.py b/tests/test_01_root_execute.py
index ab5805f..b0dbffa 100644
--- a/tests/test_01_root_execute.py
+++ b/tests/test_01_root_execute.py
@@ -4,29 +4,32 @@
 from pathlib import Path
 from shutil import copyfile, copymode, which
 
+import config
 import requests
+from conftest import check_in, check_not_in
+from urllib3.util import parse_url
 
 
 def test_config_generator(shell, privoxy_blocklist) -> None:
     """Test config generator with default path."""
-    config = Path("/etc/privoxy-blocklist.conf")
-    if config.exists():
-        config.unlink()
+    config_file = Path("/etc/privoxy-blocklist.conf")
+    if config_file.exists():
+        config_file.unlink()
     ret = shell.run(privoxy_blocklist)
     assert ret.returncode == 2
     assert "Creating default one and exiting" in ret.stdout
-    assert config.exists()
+    assert config_file.exists()
 
 
 def test_custom_config_generator(shell, tmp_path, privoxy_blocklist) -> None:
     """Test config generator with custom path."""
-    config = Path(f"{tmp_path}/privoxy-blocklist")
-    if config.exists():
-        config.unlink()
-    ret = shell.run(privoxy_blocklist, "-c", str(config))
+    config_file = Path(f"{tmp_path}/privoxy-blocklist")
+    if config_file.exists():
+        config_file.unlink()
+    ret = shell.run(privoxy_blocklist, "-c", str(config_file))
     assert ret.returncode == 2
     assert "Creating default one and exiting" in ret.stdout
-    assert config.exists()
+    assert config_file.exists()
 
 
 def test_version_option(shell, tmp_path, privoxy_blocklist) -> None:
@@ -58,77 +61,59 @@ def test_next_run(shell, privoxy_blocklist) -> None:
 
 def test_request_success(start_privoxy, supported_schemes) -> None:
     """Test URLs not blocked by privoxy."""
-    # FIXME: see https://github.com/Andrwe/privoxy-blocklist/issues/35
-    urls = ["duckduckgo.com/", "hs-exp.jp/ads/"]
-    urls = ["duckduckgo.com/"]
-    run_requests(start_privoxy, supported_schemes, urls, [200, 301, 302])
+    run_requests(start_privoxy, supported_schemes, config.urls_allowed, [200, 301, 302])
 
 
 def test_request_block_url(start_privoxy, supported_schemes) -> None:
     """Test URLs blocked by privoxy due to easylist."""
-    urls = [
-        "andrwe.org/ads/",
-        "andrwe.jp/ads/",
-        "pubfeed.linkby.com",
-        f"s3.{'a'*6}.amazonaws.com/{'0123abcd'*6}/{'ab,12'*2}/",
-    ]
-    urls = ["andrwe.org/ads/", "andrwe.jp/ads/", "pubfeed.linkby.com"]
-    run_requests(start_privoxy, supported_schemes, urls, [403])
+    run_requests(start_privoxy, supported_schemes, config.urls_blocked, [403])
 
 
+def test_removed_content(start_privoxy, httpserver) -> None:
+    """Test filters for removing content."""
+    with Path(__file__).parent.joinpath("response.html").open(
+        "r", encoding="UTF-8"
+    ) as f_h:
+        response_html = f_h.read()
+    httpserver.expect_request("/").respond_with_data(
+        response_data=response_html, content_type="text/html"
+    )
+    parsed_url = parse_url(httpserver.url_for("/"))
+    parsed_port = f":{parsed_url.port}" if parsed_url.port else ""
+    scheme_less_url = f"{parsed_url.host}{parsed_port}{parsed_url.request_uri}"
+    response = run_request(
+        start_privoxy,
+        scheme=parsed_url.scheme or "http",
+        url=scheme_less_url,
+        expected_code=[200],
+    )
+    # expected response
+    assert check_in("just-some-test-string-always-present", response.text)
+    for needle in config.content_removed:
+        # check presence of needle without privoxy
+        assert check_in(needle, requests.get(httpserver.url_for("/"), timeout=10).text)
+        # check presence of needle with privoxy
+        assert check_not_in(needle, response.text)
+    for needle in config.content_exists:
+        # check presence of needle without privoxy
+        assert check_in(needle, requests.get(httpserver.url_for("/"), timeout=10).text)
+        # check presence of needle with privoxy
+        assert check_in(needle, response.text)
+
+
+# must be second last test as it will generate unpredictable privoxy configurations
 def test_predefined_custom_config_generator(shell, privoxy_blocklist) -> None:
     """Run tests for all pre-defined configs."""
-    checks = {
-        "url_extended_config.conf": [
-            (
-                check_in,
-                "Processing https://raw.githubusercontent.com/easylist/easylist/master/"
-                "easylist/easylist_allowlist_general_hide.txt",
-            ),
-            (
-                check_in,
-                "Processing https://easylist-downloads.adblockplus.org/easylistgermany.txt",
-            ),
-            (
-                check_in,
-                "The list recieved from https://raw.githubusercontent.com/easylist/easylist/master"
-                "/easylist/easylist_allowlist_general_hide.txt does not contain AdblockPlus list "
-                "header. Try to process anyway.",
-            ),
-            (
-                check_not_in,
-                "created and added image handler",
-            ),
-        ],
-        "debugging.conf": [
-            (
-                check_in,
-                "Processing https://easylist-downloads.adblockplus.org/easylistgermany.txt",
-            ),
-            (
-                check_not_in,
-                "does not contain AdblockPlus list header.",
-            ),
-            (
-                check_in,
-                "‘/tmp/privoxy-blocklist.sh/easylist.txt’ saved",
-            ),
-            (
-                check_in,
-                "created and added image handler",
-            ),
-        ],
-    }
     test_config_dir = Path(__file__).parent / "configs"
-    for config in test_config_dir.iterdir():
-        if not config.is_file():
+    for config_file in test_config_dir.iterdir():
+        if not config_file.is_file():
             continue
-        ret = shell.run(privoxy_blocklist, "-c", str(config))
+        ret = shell.run(privoxy_blocklist, "-c", str(config_file))
         assert ret.returncode == 0
         assert check_not_in("Creating default one and exiting", ret.stdout)
-        for check in checks.get(config.name, []):
+        for check in config.config_checks.get(config_file.name, []):
             assert check[0](check[1], ret.stdout)
-        assert config.exists()
+        assert config_file.exists()
 
 
 # must be last test as it will uninstall dependencies and check error handling
@@ -157,16 +142,6 @@ def test_privoxy_runtime_log() -> None:
 # Heloer functions
 
 
-def check_in(needle: str, haystack: str) -> bool:
-    """Check given haystack for given string."""
-    return needle in haystack
-
-
-def check_not_in(needle: str, haystack: str) -> bool:
-    """Check that given string is not in given text."""
-    return needle not in haystack
-
-
 def run_requests(
     start_privoxy, supported_schemes, urls: list[str], expected_code: list[int]
 ) -> None:

From db8e04e85f30463cc0417f18ac40046e558a308b Mon Sep 17 00:00:00 2001
From: Andrwe Lord Weber <github@andrwe.org>
Date: Mon, 15 Jan 2024 23:47:38 +0100
Subject: [PATCH 04/11] add vim & curl for debugging in Docker container

Signed-off-by: Andrwe Lord Weber <github@andrwe.org>
---
 tests/Dockerfile_ubuntu | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/Dockerfile_ubuntu b/tests/Dockerfile_ubuntu
index d5366d5..08b58c0 100644
--- a/tests/Dockerfile_ubuntu
+++ b/tests/Dockerfile_ubuntu
@@ -5,10 +5,12 @@ COPY		helper/install_deps.sh /install_deps.sh
 ENV		DEBIAN_FRONTEND=noninteractive
 RUN		apt-get update \
 		&& apt-get install --no-install-recommends -q --yes \
+			curl \
 			build-essential \
 			python3-pip \
 			python3-dev \
 			sudo \
+			vim \
 		&& pip install --no-cache-dir -qr /requirements.txt \
 		&& rm -f /requirements.txt \
 		&& install -d -o root -g root /pytest_cache \

From c6c994afe4d5c04cb86c90a2ddd663c922a897a9 Mon Sep 17 00:00:00 2001
From: Andrwe Lord Weber <github@andrwe.org>
Date: Mon, 15 Jan 2024 23:59:42 +0100
Subject: [PATCH 05/11] optimize id & class matching rules

Signed-off-by: Andrwe Lord Weber <github@andrwe.org>
---
 .pre-commit-config.yaml |   6 ++
 privoxy-blocklist.sh    | 128 +++++++++++++++++++++++++++++-----------
 2 files changed, 99 insertions(+), 35 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f6db1e5..7f039ef 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,6 +22,12 @@ repos:
     rev: v3.7.0.1
     hooks:
       - id: shfmt
+        args:
+          - "--binary-next-line"
+          - "--case-indent"
+          - "--indent"
+          - "4"
+          - "--space-redirects"
   - repo: https://github.com/AleksaC/hadolint-py
     rev: v2.12.0.3
     hooks:
diff --git a/privoxy-blocklist.sh b/privoxy-blocklist.sh
index 992e139..7c8be59 100755
--- a/privoxy-blocklist.sh
+++ b/privoxy-blocklist.sh
@@ -267,39 +267,89 @@ function main() {
         ' "${address_file}" >> "${actionfile}"
 
         debug 1 "... creating filterfile for ${list} ..."
-        echo "FILTER: ${list} Tag filter of ${list}" > "${filterfile}"
+        echo "FILTER: ${list}_class_global Tag filter of ${list}" > "${filterfile}"
         debug 1 "... processing 'class'-matches ..."
-        sed '
-        # only process gloabl classes
-        /^##\..*/!d
-        # cleanup
-        s/^##//g
-        # convert classes independent of HTML tag
-        s/^\.\(.*\)/s@<([a-zA-Z0-9]+)\\s+.*class=.?\1.*>.*<\/\\1>@@g/g
-        # convert classes with defined HTML tag
-        s/^\([a-zA-Z0-9]*\)\.\(.*\)\[.*\]\[.*\]*/s@<\1.*class=.?\2.*>.*<\/\1>@@g/g
-        ' "${html_file}" >> "${filterfile}"
+        (
+            lines=()
+            # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
+            sed -e '
+                # only process gloabl class matches
+                /^##\..*/!d
+                # remove all combinations with attribute matching
+                /^##\..*\[.*/d
+                # remove all matches with combinators
+                /^##\..*[>+~ ].*/d
+                # cleanup
+                s/^##\.//g
+                # prepare regex merging
+                s/$/|/
+            ' "${html_file}" | while read -r line; do
+                # number of matches within one rule impacts runtime of each request to modify the content
+                if [ "${#lines[@]}" -lt 1000 ]; then
+                    lines+=("$line")
+                    continue
+                fi
+                # complexity of regex impacts runtime of each request to modify the content
+                # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
+                # printf to inject both quoting characters " and '
+                printf 's@<([a-zA-Z0-9]+)\\s+.*class=[%s][^%s]*(' "\"'" "\"'"
+                # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
+                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                # printf to inject both quoting characters " and '
+                printf ')[^%s]*[%s].*>.*<\/\\1[^>]*>@@g\n' "\"'" "\"'"
+                lines=()
+            done
+        ) >> "${filterfile}"
         # FIXME: add class handling with domains
+        # FIXME: add class handling with combinators
+        # FIXME: add class with defined HTML tag ?
+        # FIXME: add class with cascading
 
+        echo "FILTER: ${list}_id_global Tag filter of ${list}" >> "${filterfile}"
         debug 1 "... processing 'id'-matches ..."
-        sed '
-        # only process gloabl classes
-        /^###.*/!d
-        # cleanup
-        s/^##//g
-        # convert id independent of HTML tag
-        s/^#\(.*\)/s@<.*id=.?\1.*>.*<\/@@g/g
-        # convert id with defined HTML tag and extended selectors
-        s/^\([a-zA-Z0-9][a-zA-Z0-9]*\)#\(.*\):.*[\:[^:]]*[^:]*/s@<\1.*id=.?\2.*>.*<\/\1>@@g/g
-        # convert id with defined HTML tag
-        s/^\([a-zA-Z0-9][a-zA-Z0-9]*\)#\(.*\)/s@<\1.*id=.?\2.*>.*<\/\1>@@g/g
-        ' "${html_file}" >> "${filterfile}"
+        (
+            lines=()
+            # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
+            sed -e '
+                # only process gloabl classes
+                /^###.*/!d
+                # remove all matches with combinators
+                /^###.*[>+~].*/d
+                # cleanup
+                s/^###//g
+                # prepare regex merging
+                s/$/|/
+            ' "${html_file}" | while read -r line; do
+                # number of matches within one rule impacts runtime of each request to modify the content
+                if [ "${#lines[@]}" -lt 1000 ]; then
+                    lines+=("$line")
+                    continue
+                fi
+                # complexity of regex impacts runtime of each request to modify the content
+                # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
+                # printf to inject both quoting characters " and '
+                printf 's@<([a-zA-Z0-9]+)\\s+.*id=[%s](' "\"'"
+                # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
+                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                # printf to inject both quoting characters " and '
+                printf ')[%s].*>.*<\/\\1[^>]*>@@g\n' "\"'"
+                lines=()
+            done
+        ) >> "${filterfile}"
         # FIXME: add id handling with domains
+        # FIXME: add id handling with combinators
+        # FIXME: add id with defined HTML tag:
+        #        s/^\([a-zA-Z0-9][a-zA-Z0-9]*\)#\(.*\):.*[\:[^:]]*[^:]*/s@<\1.*id=.?\2.*>.*<\/\1>@@g/g
+        #        s/^\([a-zA-Z0-9][a-zA-Z0-9]*\)#\(.*\)/s@<\1.*id=.?\2.*>.*<\/\1>@@g/g
+        # FIXME: add id with cascading
 
+        echo "FILTER: ${list}_attribute Tag filter of ${list}" >> "${filterfile}"
         debug 1 "... processing 'attribute'-matches ..."
         sed '
         # only process gloabl classes
         /^##\[.*/!d
+        # remove all matches with combinators
+        /^##\[.*[>+~].*/d
         # cleanup
         s/^##//g
         # convert attribute based filters with exact match with exact match
@@ -316,10 +366,18 @@ function main() {
         s/\.\([a-zA-Z0-9]\)/\\.\1/g
         ' "${html_file}" >> "${filterfile}"
         # FIXME: add attribute handling with domains
+        # FIXME: add attribute handling with combinators
+        # FIXME: add combination of classes and attributes: ##.OUTBRAIN[data-widget-id^="FMS_REELD_"]
 
         debug 1 "... filterfile created - adding filterfile to actionfile ..."
-        echo "{ +filter{${list}} }" >> "${actionfile}"
-        echo ".*" >> "${actionfile}"
+        (
+            echo "{ +filter{${list}_class_global} }"
+            echo "/"
+            echo "{ +filter{${list}_id_global} }"
+            echo "/"
+            echo "{ +filter{${list}_attribute} }"
+            echo "*"
+        ) >> "${actionfile}"
         debug 1 "... filterfile added ..."
 
         # create domain based whitelist
@@ -406,18 +464,18 @@ function lock() {
 
 # shellcheck disable=SC2317
 function remove() {
-            read -rp "Do you really want to remove all build lists?(y/N) " choice
-            if [ "${choice}" != "y" ]; then
-                exit 0
+    read -rp "Do you really want to remove all build lists?(y/N) " choice
+    if [ "${choice}" != "y" ]; then
+        exit 0
     fi
-            if rm -rf "${PRIVOXY_DIR}/"*.script.{action,filter} \
-                && sed '/^actionsfile .*\.script\.action$/d;/^filterfile .*\.script\.filter$/d' -i "${PRIVOXY_CONF}"; then
-                echo "Lists removed."
-                exit 0
+    if rm -rf "${PRIVOXY_DIR}/"*.script.{action,filter} \
+        && sed '/^actionsfile .*\.script\.action$/d;/^filterfile .*\.script\.filter$/d' -i "${PRIVOXY_CONF}"; then
+        echo "Lists removed."
+        exit 0
     fi
-            error "An error occured while removing the lists."
-            error "Please have a look into ${PRIVOXY_DIR} whether there are .script.* files and search for *.script.* in ${PRIVOXY_CONF}."
-            exit 1
+    error "An error occured while removing the lists."
+    error "Please have a look into ${PRIVOXY_DIR} whether there are .script.* files and search for *.script.* in ${PRIVOXY_CONF}."
+    exit 1
 }
 
 VERBOSE=()

From 519998cb07f4a61dc4dc48c7df258a825c2798fd Mon Sep 17 00:00:00 2001
From: Andrwe Lord Weber <github@andrwe.org>
Date: Wed, 17 Jan 2024 00:32:14 +0100
Subject: [PATCH 06/11] move testwebserver to conftest.py

Signed-off-by: Andrwe Lord Weber <github@andrwe.org>
---
 .ci_config/prospector.yaml    |  7 +++++++
 tests/conftest.py             | 33 +++++++++++++++++++++++++++++++++
 tests/test_01_root_execute.py | 33 +++++++++++++++++----------------
 3 files changed, 57 insertions(+), 16 deletions(-)

diff --git a/.ci_config/prospector.yaml b/.ci_config/prospector.yaml
index ad9e5ef..8d31dcf 100644
--- a/.ci_config/prospector.yaml
+++ b/.ci_config/prospector.yaml
@@ -13,3 +13,10 @@ bandit:
 
 mypy:
   run: true
+
+pydocstyle:
+  disable:
+    # conflicts with D211
+    - D203
+    # conflicts with D211
+    - D212
diff --git a/tests/conftest.py b/tests/conftest.py
index be9238d..2325574 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -10,10 +10,30 @@
 import requests
 from pytest import StashKey
 from pytestshellutils.shell import Daemon
+from urllib3.util import Url, parse_url
 
 phase_report_key = StashKey[int]()
 
 
+class UrlParsed:
+    """Class to parse and store URL."""
+
+    origin_url: str
+    parsed_url: Url
+    scheme: str
+    scheme_less_url: str
+
+    def __init__(self, url: str):
+        """Initialize object by parsing given URL."""
+        self.origin_url = url
+        self.parsed_url = parse_url(self.origin_url)
+        self.scheme = self.parsed_url.scheme or "http"
+        parsed_port = f":{self.parsed_url.port}" if self.parsed_url.port else ""
+        self.scheme_less_url = (
+            f"{self.parsed_url.host}{parsed_port}{self.parsed_url.request_uri}"
+        )
+
+
 def debug_enabled() -> bool:
     """Check if debugging is enabled."""
     # RUNNER_DEBUG = set when "debug logging" activated
@@ -55,6 +75,19 @@ def pytest_runtest_makereport(item: pytest.Item):
     return report
 
 
+@pytest.fixture
+def webserver(httpserver) -> UrlParsed:
+    """Start HTTP server and return parsed URL object."""
+    with Path(__file__).parent.joinpath("response.html").open(
+        "r", encoding="UTF-8"
+    ) as f_h:
+        response_html = f_h.read()
+    httpserver.expect_request("/").respond_with_data(
+        response_data=response_html, content_type="text/html"
+    )
+    return UrlParsed(httpserver.url_for("/"))
+
+
 @pytest.fixture(scope="module")
 def privoxy_blocklist() -> str:
     """Return the path to privoxy-blocklist.sh."""
diff --git a/tests/test_01_root_execute.py b/tests/test_01_root_execute.py
index b0dbffa..16b1b51 100644
--- a/tests/test_01_root_execute.py
+++ b/tests/test_01_root_execute.py
@@ -7,7 +7,6 @@
 import config
 import requests
 from conftest import check_in, check_not_in
-from urllib3.util import parse_url
 
 
 def test_config_generator(shell, privoxy_blocklist) -> None:
@@ -69,34 +68,36 @@ def test_request_block_url(start_privoxy, supported_schemes) -> None:
     run_requests(start_privoxy, supported_schemes, config.urls_blocked, [403])
 
 
-def test_removed_content(start_privoxy, httpserver) -> None:
+def test_content_removed(start_privoxy, webserver) -> None:
     """Test filters for removing content."""
-    with Path(__file__).parent.joinpath("response.html").open(
-        "r", encoding="UTF-8"
-    ) as f_h:
-        response_html = f_h.read()
-    httpserver.expect_request("/").respond_with_data(
-        response_data=response_html, content_type="text/html"
-    )
-    parsed_url = parse_url(httpserver.url_for("/"))
-    parsed_port = f":{parsed_url.port}" if parsed_url.port else ""
-    scheme_less_url = f"{parsed_url.host}{parsed_port}{parsed_url.request_uri}"
     response = run_request(
         start_privoxy,
-        scheme=parsed_url.scheme or "http",
-        url=scheme_less_url,
+        scheme=webserver.scheme,
+        url=webserver.scheme_less_url,
         expected_code=[200],
     )
     # expected response
     assert check_in("just-some-test-string-always-present", response.text)
     for needle in config.content_removed:
         # check presence of needle without privoxy
-        assert check_in(needle, requests.get(httpserver.url_for("/"), timeout=10).text)
+        assert check_in(needle, requests.get(webserver.origin_url, timeout=10).text)
         # check presence of needle with privoxy
         assert check_not_in(needle, response.text)
+
+
+def test_content_exists(start_privoxy, webserver) -> None:
+    """Test filters for removing content."""
+    response = run_request(
+        start_privoxy,
+        scheme=webserver.scheme,
+        url=webserver.scheme_less_url,
+        expected_code=[200],
+    )
+    # expected response
+    assert check_in("just-some-test-string-always-present", response.text)
     for needle in config.content_exists:
         # check presence of needle without privoxy
-        assert check_in(needle, requests.get(httpserver.url_for("/"), timeout=10).text)
+        assert check_in(needle, requests.get(webserver.origin_url, timeout=10).text)
         # check presence of needle with privoxy
         assert check_in(needle, response.text)
 

From bd759be6cd06579181dcf7872c9936a6f5b740fc Mon Sep 17 00:00:00 2001
From: Andrwe Lord Weber <github@andrwe.org>
Date: Wed, 17 Jan 2024 02:16:35 +0100
Subject: [PATCH 07/11] add attribute-name-only handler & fix left-over
 handling

Signed-off-by: Andrwe Lord Weber <github@andrwe.org>
---
 privoxy-blocklist.sh | 155 ++++++++++++++++++++++++++++++++-----------
 tests/config.py      |   1 +
 tests/response.html  |   1 +
 3 files changed, 117 insertions(+), 40 deletions(-)

diff --git a/privoxy-blocklist.sh b/privoxy-blocklist.sh
index 7c8be59..b42579a 100755
--- a/privoxy-blocklist.sh
+++ b/privoxy-blocklist.sh
@@ -204,6 +204,7 @@ function main() {
         regex_file="${file}.regex"
         regex_except_file="${file}.regex_except"
         html_file="${file}.html"
+        html_except_file="${file}.html_except"
         actionfile=${file%\.*}.script.action
         filterfile=${file%\.*}.script.filter
         list="$(basename "${file%\.*}")"
@@ -234,7 +235,8 @@ function main() {
         grep '^/^' "${file}" > "${regex_file}"
         grep '^@@/^' "${file}" > "${regex_except_file}"
         ## html element block
-        grep '^.*##..*' "${file}" > "${html_file}"
+        grep -E '^.*##.+' "${file}" > "${html_file}"
+        grep -E '^.*#@#.+' "${file}" > "${html_except_file}"
         set -e
 
         # convert AdblockPlus list to Privoxy list
@@ -267,9 +269,11 @@ function main() {
         ' "${address_file}" >> "${actionfile}"
 
         debug 1 "... creating filterfile for ${list} ..."
+        debug 1 "... processing global 'class'-matches ..."
         echo "FILTER: ${list}_class_global Tag filter of ${list}" > "${filterfile}"
-        debug 1 "... processing 'class'-matches ..."
         (
+            # allow handling of left-over lines from last while-loop-run
+            shopt -s lastpipe
             lines=()
             # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
             sed -e '
@@ -299,22 +303,38 @@ function main() {
                 printf ')[^%s]*[%s].*>.*<\/\\1[^>]*>@@g\n' "\"'" "\"'"
                 lines=()
             done
+            # process last chunk with less than 1000 entries
+            if [ "${#lines[@]}" -gt 0 ]; then
+                printf 's@<([a-zA-Z0-9]+)\\s+.*class=[%s][^%s]*(' "\"'" "\"'"
+                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                printf ')[^%s]*[%s].*>.*<\/\\1[^>]*>@@g\n' "\"'" "\"'"
+            fi
+            shopt -u lastpipe
         ) >> "${filterfile}"
+
+        debug 1 "... registering ${list}_class_global in actionfile ..."
+        (
+            echo "{ +filter{${list}_class_global} }"
+            echo "/"
+        ) >> "${actionfile}"
+        debug 1 "... registered ..."
         # FIXME: add class handling with domains
         # FIXME: add class handling with combinators
         # FIXME: add class with defined HTML tag ?
         # FIXME: add class with cascading
 
+        debug 1 "... processing global 'id'-matches ..."
         echo "FILTER: ${list}_id_global Tag filter of ${list}" >> "${filterfile}"
-        debug 1 "... processing 'id'-matches ..."
         (
+            # allow handling of left-over lines from last while-loop-run
+            shopt -s lastpipe
             lines=()
             # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
             sed -e '
-                # only process gloabl classes
+                # only process gloabl id-only matches
                 /^###.*/!d
                 # remove all matches with combinators
-                /^###.*[>+~].*/d
+                /^###.*[>+~ ].*/d
                 # cleanup
                 s/^###//g
                 # prepare regex merging
@@ -335,50 +355,105 @@ function main() {
                 printf ')[%s].*>.*<\/\\1[^>]*>@@g\n' "\"'"
                 lines=()
             done
+            # process last chunk with less than 1000 entries
+            if [ "${#lines[@]}" -gt 0 ]; then
+                printf 's@<([a-zA-Z0-9]+)\\s+.*id=[%s](' "\"'"
+                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                printf ')[%s].*>.*<\/\\1[^>]*>@@g\n' "\"'"
+            fi
+            shopt -u lastpipe
         ) >> "${filterfile}"
+
+        debug 1 "... registering ${list}_id_global in actionfile ..."
+        (
+            echo "{ +filter{${list}_id_global} }"
+            echo "/"
+        ) >> "${actionfile}"
+        debug 1 "... registered ..."
         # FIXME: add id handling with domains
         # FIXME: add id handling with combinators
-        # FIXME: add id with defined HTML tag:
-        #        s/^\([a-zA-Z0-9][a-zA-Z0-9]*\)#\(.*\):.*[\:[^:]]*[^:]*/s@<\1.*id=.?\2.*>.*<\/\1>@@g/g
-        #        s/^\([a-zA-Z0-9][a-zA-Z0-9]*\)#\(.*\)/s@<\1.*id=.?\2.*>.*<\/\1>@@g/g
         # FIXME: add id with cascading
 
-        echo "FILTER: ${list}_attribute Tag filter of ${list}" >> "${filterfile}"
-        debug 1 "... processing 'attribute'-matches ..."
-        sed '
-        # only process gloabl classes
-        /^##\[.*/!d
-        # remove all matches with combinators
-        /^##\[.*[>+~].*/d
-        # cleanup
-        s/^##//g
-        # convert attribute based filters with exact match with exact match
-        s/^\[\([^=^]*\)"*=\(.*\)\]/s@\1=\2>@@g/g
-        # convert attribute based filter with contain match
-        s/^\[\([^=^]*\)"*\*="*\([^"]*\)"*\]/s@\1=".*\2.*">@@g/g
-        # convert attribute based filter with startwith match
-        s/^\[\([^=]*\)"*^="*\([^"]*\)"*\]/s@\1="\2.*">@@g/g
-        # convert attribute based filter with endswith match
-        s/^\[\([^=^]*\)"*\$="*\([^"]*\)"*\]/s@\1=".*\2">@@g/g
-        # convert attribute name-only matches
-        s/^\[\(.*\)"*\]/s@<.*\1.*\/>@@g\ns@<\([^ ]*\) .*\1.*>.*<\/\\1.*>@@g/g
-        # convert dots
-        s/\.\([a-zA-Z0-9]\)/\\.\1/g
-        ' "${html_file}" >> "${filterfile}"
-        # FIXME: add attribute handling with domains
-        # FIXME: add attribute handling with combinators
-        # FIXME: add combination of classes and attributes: ##.OUTBRAIN[data-widget-id^="FMS_REELD_"]
+        debug 1 "... processing 'attribute'-matches with name only and no HTML tag ..."
+        echo "FILTER: ${list}_attribute_global_name_only Tag filter of ${list}" >> "${filterfile}"
+        (
+            # allow handling of left-over lines from last while-loop-run
+            shopt -s lastpipe
+            lines=()
+            # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
+            sed -e '
+                # only process gloabl classes
+                /^##\[[^=][^=]*$/!d
+                # remove all matches with combinators
+                /^##.*[>+~ ].*/d
+                # cleanup
+                s/^##//g
+                # convert attribute name-only matches
+                s/^\[\([^=][^=]*\)\]/\1/g
+                # convert dots
+                s/\.\([^\.]\)/\\.\1/g
+                s/$/|/
+            ' "${html_file}" | sort -u | while read -r line; do
+                # number of matches within one rule impacts runtime of each request to modify the content
+                if [ "${#lines[@]}" -lt 1000 ]; then
+                    lines+=("$line")
+                    continue
+                fi
+                # complexity of regex impacts runtime of each request to modify the content
+                # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
+                # printf to inject both quoting characters " and '
+                printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
+                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                # printf to inject both quoting characters " and '
+                printf ').*>.*<\/\\1[^>]*>@@g\n'
+                lines=()
+            done
+            # process last chunk with less than 1000 entries
+            if [ "${#lines[@]}" -gt 0 ]; then
+                printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                printf ').*>.*<\/\\1[^>]*>@@g\n'
+            fi
+            shopt -u lastpipe
+        ) >> "${filterfile}"
 
-        debug 1 "... filterfile created - adding filterfile to actionfile ..."
+        debug 1 "... registering ${list}_attribute_global_name_only in actionfile ..."
         (
-            echo "{ +filter{${list}_class_global} }"
-            echo "/"
-            echo "{ +filter{${list}_id_global} }"
+            echo "{ +filter{${list}_attribute_global_name_only} }"
             echo "/"
-            echo "{ +filter{${list}_attribute} }"
-            echo "*"
         ) >> "${actionfile}"
-        debug 1 "... filterfile added ..."
+        debug 1 "... registered ..."
+
+        #debug 1 "... processing 'attribute'-matches ..."
+        #sed '
+        ## only process gloabl classes
+        #/^##\[.*/!d
+        ## remove all matches with combinators
+        #/^##\[.*[>+~].*/d
+        ## cleanup
+        #s/^##//g
+        ## convert attribute based filters with exact match with
+        #s/^\[\([^=^]*\)"*=\(.*\)\]/s@\1=\2>@@g/g
+        ## convert attribute based filter with contain match
+        #s/^\[\([^=^]*\)"*\*="*\([^"]*\)"*\]/s@\1=".*\2.*">@@g/g
+        ## convert attribute based filter with startwith match
+        #s/^\[\([^=]*\)"*^="*\([^"]*\)"*\]/s@\1="\2.*">@@g/g
+        ## convert attribute based filter with endswith match
+        #s/^\[\([^=^]*\)"*\$="*\([^"]*\)"*\]/s@\1=".*\2">@@g/g
+        ## convert dots
+        #s/\.\([a-zA-Z0-9]\)/\\.\1/g
+        #' "${html_file}" >> "${filterfile}"
+
+        #debug 1 "... registering ${list}_attribute in actionfile ..."
+        #(
+        #    echo "{ +filter{${list}_attribute} }"
+        #    echo "*"
+        #) >> "${actionfile}"
+        #debug 1 "... registered ..."
+        # FIXME: add attribute handling with domains
+        # FIXME: add attribute handling with combinators
+        # FIXME: add combination of classes and attributes: ##.OUTBRAIN[data-widget-id^="FMS_REELD_"]
 
         # create domain based whitelist
 
diff --git a/tests/config.py b/tests/config.py
index 83f1bad..5f3bb12 100644
--- a/tests/config.py
+++ b/tests/config.py
@@ -6,6 +6,7 @@
     "ad_970x250",  # class match: https://www.iphoneitalia.com/
     "MyAdsId3",  # id match
     "AdRight2",  # class match with element having multiple classes
+    "data-ad-manager-id",  # attribute match
 ]
 content_exists = [
     "ajlkl",  # should exist, although one element is removed by privoxy
diff --git a/tests/response.html b/tests/response.html
index 196a7b7..28ceb21 100644
--- a/tests/response.html
+++ b/tests/response.html
@@ -5,5 +5,6 @@
 		<div class="asd ajlkl AdRight2">multiple classes that should be removed</div>
 		<div class="asd ajlkl"> multiple classes that should exist </div>
 		<div id="MyAdsId3">id should be removed</div>
+		<div data-ad-manager-id class="success-valid-class">name-only attibute should be removed</div>
 	</body>
 </html>

From 25f7dfb987b5d0e4a067d6c0f589b048460b0a57 Mon Sep 17 00:00:00 2001
From: Andrwe Lord Weber <github@andrwe.org>
Date: Sat, 27 Jan 2024 15:13:46 +0100
Subject: [PATCH 08/11] add attribute matches for exact, startswith & endswith

Signed-off-by: Andrwe Lord Weber <github@andrwe.org>
---
 privoxy-blocklist.sh | 194 ++++++++++++++++++++++++++++++++++++-------
 tests/config.py      |   5 ++
 tests/response.html  |   6 ++
 3 files changed, 175 insertions(+), 30 deletions(-)

diff --git a/privoxy-blocklist.sh b/privoxy-blocklist.sh
index b42579a..872be2d 100755
--- a/privoxy-blocklist.sh
+++ b/privoxy-blocklist.sh
@@ -374,11 +374,11 @@ function main() {
         # FIXME: add id handling with combinators
         # FIXME: add id with cascading
 
-        debug 1 "... processing 'attribute'-matches with name only and no HTML tag ..."
-        echo "FILTER: ${list}_attribute_global_name_only Tag filter of ${list}" >> "${filterfile}"
+        debug 1 "... processing 'attribute'-matches with no HTML tag ..."
         (
-            # allow handling of left-over lines from last while-loop-run
             shopt -s lastpipe
+            # allow handling of left-over lines from last while-loop-run
+            echo "FILTER: ${list}_attribute_global_name_only Tag filter of ${list}"
             lines=()
             # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
             sed -e '
@@ -415,42 +415,176 @@ function main() {
                 printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
                 printf ').*>.*<\/\\1[^>]*>@@g\n'
             fi
+
+            echo "FILTER: ${list}_attribute_exact Tag filter of ${list}"
+            lines=()
+            # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
+            sed -e '
+                # only process gloabl classes
+                /^##\[[^=^*][^=^*]*=.*$/!d
+                # remove all matches with combinators
+                /^##.*[>+~ ].*/d
+                # cleanup
+                s/^##//g
+                # convert attribute name-only matches
+                s/^\[\([^=][^=]*\)=\(.*\)\]/\1=\2/g
+                # convert dots
+                s/\.\([^\.]\)/\\.\1/g
+                s/$/|/
+            ' "${html_file}" | sort -u | while read -r line; do
+                # number of matches within one rule impacts runtime of each request to modify the content
+                if [ "${#lines[@]}" -lt 1000 ]; then
+                    lines+=("$line")
+                    continue
+                fi
+                # complexity of regex impacts runtime of each request to modify the content
+                # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
+                # printf to inject both quoting characters " and '
+                printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
+                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                # printf to inject both quoting characters " and '
+                printf ').*>.*<\/\\1[^>]*>@@g\n'
+                lines=()
+            done
+            # process last chunk with less than 1000 entries
+            if [ "${#lines[@]}" -gt 0 ]; then
+                printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                printf ').*>.*<\/\\1[^>]*>@@g\n'
+            fi
+
+            echo "FILTER: ${list}_attribute_contain Tag filter of ${list}"
+            lines=()
+            # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
+            sed -e '
+                # only process gloabl classes
+                /^##\[[^*][^*]*\*=.*$/!d
+                # remove all matches with combinators
+                /^##.*[>+~ ].*/d
+                # cleanup
+                s/^##//g
+                # convert dots
+                s/\.\([^\.]\)/\\.\1/g
+                # convert attribute based filter with contain match
+                s/^\[\([^*][^*]*\)\*=\(["'"'"']*\)\([^"][^"]*\)"*\(["'"'"']*\)\]/\1=\2.*\3.*\4/g
+                s/$/|/
+            ' "${html_file}" | sort -u | while read -r line; do
+                # number of matches within one rule impacts runtime of each request to modify the content
+                if [ "${#lines[@]}" -lt 1000 ]; then
+                    lines+=("$line")
+                    continue
+                fi
+                # complexity of regex impacts runtime of each request to modify the content
+                # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
+                # printf to inject both quoting characters " and '
+                printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
+                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                # printf to inject both quoting characters " and '
+                printf ').*>.*<\/\\1[^>]*>@@g\n'
+                lines=()
+            done
+            # process last chunk with less than 1000 entries
+            if [ "${#lines[@]}" -gt 0 ]; then
+                printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                printf ').*>.*<\/\\1[^>]*>@@g\n'
+            fi
+
+            echo "FILTER: ${list}_attribute_startswith Tag filter of ${list}"
+            lines=()
+            # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
+            sed -e '
+                # only process gloabl classes
+                /^##\[[^=^][^=^]*\^=.*$/!d
+                # remove all matches with combinators
+                /^##.*[>+~ ].*/d
+                # cleanup
+                s/^##//g
+                # convert dots
+                s/\.\([^\.]\)/\\.\1/g
+                # convert attribute based filter with startwith match
+                s/^\[\([^^][^^]*\)^=\(["'"'"']*\)\(.*[^"'"'"']\)\(["'"'"']*\)\]/\1=\2\3.*\4/g
+                s/$/|/
+            ' "${html_file}" | sort -u | while read -r line; do
+                # number of matches within one rule impacts runtime of each request to modify the content
+                if [ "${#lines[@]}" -lt 1000 ]; then
+                    lines+=("$line")
+                    continue
+                fi
+                # complexity of regex impacts runtime of each request to modify the content
+                # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
+                # printf to inject both quoting characters " and '
+                printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
+                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                # printf to inject both quoting characters " and '
+                printf ').*>.*<\/\\1[^>]*>@@g\n'
+                lines=()
+            done
+            # process last chunk with less than 1000 entries
+            if [ "${#lines[@]}" -gt 0 ]; then
+                printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                printf ').*>.*<\/\\1[^>]*>@@g\n'
+            fi
+
+            echo "FILTER: ${list}_attribute_endswith Tag filter of ${list}"
+            lines=()
+            # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
+            sed -e '
+                # only process gloabl classes
+                /^##\[[^$][^=$]*\$=.*$/!d
+                # remove all matches with combinators
+                /^##.*[>+~ ].*/d
+                # cleanup
+                s/^##//g
+                # convert dots
+                s/\.\([^\.]\)/\\.\1/g
+                # convert attribute based filter with endswith match
+                s/^\[\([^\$][^\$]*\)\$=\(["'"'"']*\)\(.*[^"'"'"']\)\(["'"'"']*\)\]/\1=\2.*\3\4/g
+                s/$/|/
+            ' "${html_file}" | sort -u | while read -r line; do
+                # number of matches within one rule impacts runtime of each request to modify the content
+                if [ "${#lines[@]}" -lt 1000 ]; then
+                    lines+=("$line")
+                    continue
+                fi
+                # complexity of regex impacts runtime of each request to modify the content
+                # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
+                # printf to inject both quoting characters " and '
+                printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
+                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                # printf to inject both quoting characters " and '
+                printf ').*>.*<\/\\1[^>]*>@@g\n'
+                lines=()
+            done
+            # process last chunk with less than 1000 entries
+            if [ "${#lines[@]}" -gt 0 ]; then
+                printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                printf ').*>.*<\/\\1[^>]*>@@g\n'
+            fi
             shopt -u lastpipe
         ) >> "${filterfile}"
 
-        debug 1 "... registering ${list}_attribute_global_name_only in actionfile ..."
+        debug 1 "... registering ${list}_attribute filters in actionfile ..."
         (
             echo "{ +filter{${list}_attribute_global_name_only} }"
             echo "/"
+            echo "{ +filter{${list}_attribute_exact} }"
+            echo "/"
+            echo "{ +filter{${list}_attribute_contain} }"
+            echo "/"
+            echo "{ +filter{${list}_attribute_startswith} }"
+            echo "/"
+            echo "{ +filter{${list}_attribute_endswith} }"
+            echo "/"
         ) >> "${actionfile}"
         debug 1 "... registered ..."
 
-        #debug 1 "... processing 'attribute'-matches ..."
-        #sed '
-        ## only process gloabl classes
-        #/^##\[.*/!d
-        ## remove all matches with combinators
-        #/^##\[.*[>+~].*/d
-        ## cleanup
-        #s/^##//g
-        ## convert attribute based filters with exact match with
-        #s/^\[\([^=^]*\)"*=\(.*\)\]/s@\1=\2>@@g/g
-        ## convert attribute based filter with contain match
-        #s/^\[\([^=^]*\)"*\*="*\([^"]*\)"*\]/s@\1=".*\2.*">@@g/g
-        ## convert attribute based filter with startwith match
-        #s/^\[\([^=]*\)"*^="*\([^"]*\)"*\]/s@\1="\2.*">@@g/g
-        ## convert attribute based filter with endswith match
-        #s/^\[\([^=^]*\)"*\$="*\([^"]*\)"*\]/s@\1=".*\2">@@g/g
-        ## convert dots
-        #s/\.\([a-zA-Z0-9]\)/\\.\1/g
-        #' "${html_file}" >> "${filterfile}"
-
-        #debug 1 "... registering ${list}_attribute in actionfile ..."
-        #(
-        #    echo "{ +filter{${list}_attribute} }"
-        #    echo "*"
-        #) >> "${actionfile}"
-        #debug 1 "... registered ..."
         # FIXME: add attribute handling with domains
         # FIXME: add attribute handling with combinators
         # FIXME: add combination of classes and attributes: ##.OUTBRAIN[data-widget-id^="FMS_REELD_"]
diff --git a/tests/config.py b/tests/config.py
index 5f3bb12..df40ece 100644
--- a/tests/config.py
+++ b/tests/config.py
@@ -7,9 +7,14 @@
     "MyAdsId3",  # id match
     "AdRight2",  # class match with element having multiple classes
     "data-ad-manager-id",  # attribute match
+    'data-role="tile-ads-module"',  # attribute exact match
+    'onclick="content.ad/"',  # attribute contain match
+    'class="adDisplay-module_foobar"',  # attribute startswith match
+    "onclick=\"location.href='http://www.reimageplus.com/foobar'",  # attribute startswith match
 ]
 content_exists = [
     "ajlkl",  # should exist, although one element is removed by privoxy
+    '"adDisplay-modul"',  # should exist
 ]
 
 # FIXME: see https://github.com/Andrwe/privoxy-blocklist/issues/35
diff --git a/tests/response.html b/tests/response.html
index 28ceb21..8882c68 100644
--- a/tests/response.html
+++ b/tests/response.html
@@ -6,5 +6,11 @@
 		<div class="asd ajlkl"> multiple classes that should exist </div>
 		<div id="MyAdsId3">id should be removed</div>
 		<div data-ad-manager-id class="success-valid-class">name-only attibute should be removed</div>
+		<div data-role="tile-ads-module" class="success-valid-class">exact match attibute should be removed</div>
+		<div onclick="foo_content.ad/-asd">1. contain match attribute should be removed</div>
+		<div onclick="content.ad/">2. contain match attribute should be removed</div>
+		<div class="adDisplay-module_foobar">1. startswith match attribute should be removed</div>
+		<div class="adDisplay-modul">startswith match attribute should be exist</div>
+		<div onclick="location.href='http://www.reimageplus.com/foobar'">2. startswith match attribute should be removed</div>
 	</body>
 </html>

From 321cfdd1c1715c91da441a0b0d99a408d36bed16 Mon Sep 17 00:00:00 2001
From: Andrwe Lord Weber <github@andrwe.org>
Date: Sun, 28 Jan 2024 22:42:42 +0100
Subject: [PATCH 09/11] implement flag for content filter activation

Signed-off-by: Andrwe Lord Weber <github@andrwe.org>
---
 privoxy-blocklist.sh          | 713 +++++++++++++++++++---------------
 tests/conftest.py             |  20 +
 tests/test_01_root_execute.py |  16 +-
 3 files changed, 426 insertions(+), 323 deletions(-)

diff --git a/privoxy-blocklist.sh b/privoxy-blocklist.sh
index 872be2d..51d251c 100755
--- a/privoxy-blocklist.sh
+++ b/privoxy-blocklist.sh
@@ -30,6 +30,18 @@ set -euo pipefail
 # dependencies
 DEPENDS=('privoxy' 'sed' 'grep' 'bash' 'wget')
 
+# types of content filters
+#   used in conftest.py, thus keep structure
+FILTERTYPES=(
+    "attribute_global_name"
+    "attribute_global_exact"
+    "attribute_global_contain"
+    "attribute_global_startswith"
+    "attribute_global_endswith"
+    "class_global"
+    "id_global"
+)
+
 ######################################################################
 #
 #                  No changes needed after this line.
@@ -41,13 +53,16 @@ function usage() {
     echo "${TMPNAME:-This} is a script to convert AdBlockPlus-lists into Privoxy-lists and install them."
     echo " "
     echo "Options:"
-    echo "      -h:    Show this help."
-    echo "      -c:    Path to script configuration file. (default = ${SCRIPTCONF} - OS specific)"
-    echo "      -q:    Don't give any output."
-    echo "      -v 1:  Enable verbosity 1. Show a little bit more output."
-    echo "      -v 2:  Enable verbosity 2. Show a lot more output."
-    echo "      -v 3:  Enable verbosity 3. Show all possible output and don't delete temporary files.(For debugging only!!)"
-    echo "      -r:    Remove all lists build by this script."
+    echo "      -h:         Show this help."
+    echo "      -c:         Path to script configuration file. (default = ${SCRIPTCONF} - OS specific)"
+    echo "      -f filter:  only activate given content filter, can be used multiple times. (default: empty, content-filter disabled)"
+    echo "                  Supported values: ${FILTERTYPES[*]}"
+    echo "      -q:         Don't give any output."
+    echo "      -v 1:       Enable verbosity 1. Show a little bit more output."
+    echo "      -v 2:       Enable verbosity 2. Show a lot more output."
+    echo "      -v 3:       Enable verbosity 3. Show all possible output and don't delete temporary files.(For debugging only!!)"
+    echo "      -V:         Show version."
+    echo "      -r:         Remove all lists build by this script."
 }
 
 function get_config_path() {
@@ -98,7 +113,15 @@ function prepare() {
 
 # array of URL for AdblockPlus lists
 #  for more sources just add it within the round brackets
-URLS=("https://easylist-downloads.adblockplus.org/easylistgermany.txt" "https://easylist-downloads.adblockplus.org/easylist.txt")
+URLS=(
+  "https://easylist-downloads.adblockplus.org/easylistgermany.txt"
+  "https://easylist-downloads.adblockplus.org/easylist.txt"
+)
+
+# array of content filters to convert
+#   for supported values check: $0 -h
+#   empty by default to deactivate as content filters slowdown privoxy a lot
+FILTERS=()
 
 # config for privoxy initscript providing PRIVOXY_CONF, PRIVOXY_USER and PRIVOXY_GROUP
 INIT_CONF="/etc/conf.d/privoxy"
@@ -134,6 +157,9 @@ EOF
     if [ -n "${OPT_DBG:-}" ]; then
         DBG="${OPT_DBG}"
     fi
+    if [ -n "${OPT_FILTERS[*]}" ]; then
+        FILTERS=("${OPT_FILTERS[@]}")
+    fi
     # load privoxy config
     # shellcheck disable=SC1090
     if [[ -r "${INIT_CONF:-no-init-conf}" ]]; then
@@ -190,6 +216,11 @@ function info() {
     printf '\e[1;33m%s\e[0m\n' "$@"
 }
 
+# shellcheck disable=SC2317  # function is called in case of FILTERS not empty
+function filter_active() {
+    grep -qxF "$1" <(printf '%s\n' "${FILTERS[@]}")
+}
+
 # shellcheck disable=SC2317
 function main() {
     for url in "${URLS[@]}"; do
@@ -268,326 +299,354 @@ function main() {
         s/^|\([^|][^|]*\)|/^\1\$/g;s/|$/\$/g
         ' "${address_file}" >> "${actionfile}"
 
-        debug 1 "... creating filterfile for ${list} ..."
-        debug 1 "... processing global 'class'-matches ..."
-        echo "FILTER: ${list}_class_global Tag filter of ${list}" > "${filterfile}"
-        (
-            # allow handling of left-over lines from last while-loop-run
-            shopt -s lastpipe
-            lines=()
-            # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
-            sed -e '
-                # only process gloabl class matches
-                /^##\..*/!d
-                # remove all combinations with attribute matching
-                /^##\..*\[.*/d
-                # remove all matches with combinators
-                /^##\..*[>+~ ].*/d
-                # cleanup
-                s/^##\.//g
-                # prepare regex merging
-                s/$/|/
-            ' "${html_file}" | while read -r line; do
-                # number of matches within one rule impacts runtime of each request to modify the content
-                if [ "${#lines[@]}" -lt 1000 ]; then
-                    lines+=("$line")
-                    continue
-                fi
-                # complexity of regex impacts runtime of each request to modify the content
-                # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
-                # printf to inject both quoting characters " and '
-                printf 's@<([a-zA-Z0-9]+)\\s+.*class=[%s][^%s]*(' "\"'" "\"'"
-                # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
-                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
-                # printf to inject both quoting characters " and '
-                printf ')[^%s]*[%s].*>.*<\/\\1[^>]*>@@g\n' "\"'" "\"'"
-                lines=()
-            done
-            # process last chunk with less than 1000 entries
-            if [ "${#lines[@]}" -gt 0 ]; then
-                printf 's@<([a-zA-Z0-9]+)\\s+.*class=[%s][^%s]*(' "\"'" "\"'"
-                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
-                printf ')[^%s]*[%s].*>.*<\/\\1[^>]*>@@g\n' "\"'" "\"'"
+        echo > "${filterfile}"
+        if [ -n "${FILTERS[*]}" ]; then
+            debug 1 "... creating filterfile for ${list} ..."
+            if filter_active "class_global"; then
+                debug 1 "... processing global 'class'-matches ..."
+                (
+                    # allow handling of left-over lines from last while-loop-run
+                    shopt -s lastpipe
+                    echo "FILTER: ${list}_class_global Tag filter of ${list}"
+                    lines=()
+                    # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
+                    sed -e '
+                        # only process gloabl class matches
+                        /^##\..*/!d
+                        # remove all combinations with attribute matching
+                        /^##\..*\[.*/d
+                        # remove all matches with combinators
+                        /^##\..*[>+~ ].*/d
+                        # cleanup
+                        s/^##\.//g
+                        # prepare regex merging
+                        s/$/|/
+                    ' "${html_file}" | while read -r line; do
+                        # number of matches within one rule impacts runtime of each request to modify the content
+                        if [ "${#lines[@]}" -lt 1000 ]; then
+                            lines+=("$line")
+                            continue
+                        fi
+                        # complexity of regex impacts runtime of each request to modify the content
+                        # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
+                        # printf to inject both quoting characters " and '
+                        printf 's@<([a-zA-Z0-9]+)\\s+.*class=[%s][^%s]*(' "\"'" "\"'"
+                        # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
+                        printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                        # printf to inject both quoting characters " and '
+                        printf ')[^%s]*[%s].*>.*<\/\\1[^>]*>@@g\n' "\"'" "\"'"
+                        lines=()
+                    done
+                    # process last chunk with less than 1000 entries
+                    if [ "${#lines[@]}" -gt 0 ]; then
+                        printf 's@<([a-zA-Z0-9]+)\\s+.*class=[%s][^%s]*(' "\"'" "\"'"
+                        printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                        printf ')[^%s]*[%s].*>.*<\/\\1[^>]*>@@g\n' "\"'" "\"'"
+                    fi
+                    shopt -u lastpipe
+                ) >> "${filterfile}"
+
+                debug 1 "... registering ${list}_class_global in actionfile ..."
+                (
+                    echo "{ +filter{${list}_class_global} }"
+                    echo "/"
+                ) >> "${actionfile}"
+                debug 1 "... registered ..."
+                # FIXME: add class handling with domains
+                # FIXME: add class handling with combinators
+                # FIXME: add class with defined HTML tag ?
+                # FIXME: add class with cascading
             fi
-            shopt -u lastpipe
-        ) >> "${filterfile}"
-
-        debug 1 "... registering ${list}_class_global in actionfile ..."
-        (
-            echo "{ +filter{${list}_class_global} }"
-            echo "/"
-        ) >> "${actionfile}"
-        debug 1 "... registered ..."
-        # FIXME: add class handling with domains
-        # FIXME: add class handling with combinators
-        # FIXME: add class with defined HTML tag ?
-        # FIXME: add class with cascading
-
-        debug 1 "... processing global 'id'-matches ..."
-        echo "FILTER: ${list}_id_global Tag filter of ${list}" >> "${filterfile}"
-        (
-            # allow handling of left-over lines from last while-loop-run
-            shopt -s lastpipe
-            lines=()
-            # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
-            sed -e '
-                # only process gloabl id-only matches
-                /^###.*/!d
-                # remove all matches with combinators
-                /^###.*[>+~ ].*/d
-                # cleanup
-                s/^###//g
-                # prepare regex merging
-                s/$/|/
-            ' "${html_file}" | while read -r line; do
-                # number of matches within one rule impacts runtime of each request to modify the content
-                if [ "${#lines[@]}" -lt 1000 ]; then
-                    lines+=("$line")
-                    continue
-                fi
-                # complexity of regex impacts runtime of each request to modify the content
-                # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
-                # printf to inject both quoting characters " and '
-                printf 's@<([a-zA-Z0-9]+)\\s+.*id=[%s](' "\"'"
-                # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
-                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
-                # printf to inject both quoting characters " and '
-                printf ')[%s].*>.*<\/\\1[^>]*>@@g\n' "\"'"
-                lines=()
-            done
-            # process last chunk with less than 1000 entries
-            if [ "${#lines[@]}" -gt 0 ]; then
-                printf 's@<([a-zA-Z0-9]+)\\s+.*id=[%s](' "\"'"
-                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
-                printf ')[%s].*>.*<\/\\1[^>]*>@@g\n' "\"'"
+
+            if filter_active "id_global"; then
+                debug 1 "... processing global 'id'-matches ..."
+                echo "FILTER: ${list}_id_global Tag filter of ${list}" >> "${filterfile}"
+                (
+                    # allow handling of left-over lines from last while-loop-run
+                    shopt -s lastpipe
+                    lines=()
+                    # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
+                    sed -e '
+                        # only process gloabl id-only matches
+                        /^###.*/!d
+                        # remove all matches with combinators
+                        /^###.*[>+~ ].*/d
+                        # cleanup
+                        s/^###//g
+                        # prepare regex merging
+                        s/$/|/
+                    ' "${html_file}" | while read -r line; do
+                        # number of matches within one rule impacts runtime of each request to modify the content
+                        if [ "${#lines[@]}" -lt 1000 ]; then
+                            lines+=("$line")
+                            continue
+                        fi
+                        # complexity of regex impacts runtime of each request to modify the content
+                        # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
+                        # printf to inject both quoting characters " and '
+                        printf 's@<([a-zA-Z0-9]+)\\s+.*id=[%s](' "\"'"
+                        # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
+                        printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                        # printf to inject both quoting characters " and '
+                        printf ')[%s].*>.*<\/\\1[^>]*>@@g\n' "\"'"
+                        lines=()
+                    done
+                    # process last chunk with less than 1000 entries
+                    if [ "${#lines[@]}" -gt 0 ]; then
+                        printf 's@<([a-zA-Z0-9]+)\\s+.*id=[%s](' "\"'"
+                        printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                        printf ')[%s].*>.*<\/\\1[^>]*>@@g\n' "\"'"
+                    fi
+                    shopt -u lastpipe
+                ) >> "${filterfile}"
+
+                debug 1 "... registering ${list}_id_global in actionfile ..."
+                (
+                    echo "{ +filter{${list}_id_global} }"
+                    echo "/"
+                ) >> "${actionfile}"
+                debug 1 "... registered ..."
+                # FIXME: add id handling with domains
+                # FIXME: add id handling with combinators
+                # FIXME: add id with cascading
             fi
-            shopt -u lastpipe
-        ) >> "${filterfile}"
-
-        debug 1 "... registering ${list}_id_global in actionfile ..."
-        (
-            echo "{ +filter{${list}_id_global} }"
-            echo "/"
-        ) >> "${actionfile}"
-        debug 1 "... registered ..."
-        # FIXME: add id handling with domains
-        # FIXME: add id handling with combinators
-        # FIXME: add id with cascading
-
-        debug 1 "... processing 'attribute'-matches with no HTML tag ..."
-        (
-            shopt -s lastpipe
-            # allow handling of left-over lines from last while-loop-run
-            echo "FILTER: ${list}_attribute_global_name_only Tag filter of ${list}"
-            lines=()
-            # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
-            sed -e '
-                # only process gloabl classes
-                /^##\[[^=][^=]*$/!d
-                # remove all matches with combinators
-                /^##.*[>+~ ].*/d
-                # cleanup
-                s/^##//g
-                # convert attribute name-only matches
-                s/^\[\([^=][^=]*\)\]/\1/g
-                # convert dots
-                s/\.\([^\.]\)/\\.\1/g
-                s/$/|/
-            ' "${html_file}" | sort -u | while read -r line; do
-                # number of matches within one rule impacts runtime of each request to modify the content
-                if [ "${#lines[@]}" -lt 1000 ]; then
-                    lines+=("$line")
-                    continue
+
+            debug 1 "... processing 'attribute'-matches with no HTML tag ..."
+            (
+                shopt -s lastpipe
+
+                if filter_active "attribute_global_name"; then
+                    # allow handling of left-over lines from last while-loop-run
+                    echo "FILTER: ${list}_attribute_global_name Tag filter of ${list}"
+                    lines=()
+                    # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
+                    sed -e '
+                        # only process gloabl classes
+                        /^##\[[^=][^=]*$/!d
+                        # remove all matches with combinators
+                        /^##.*[>+~ ].*/d
+                        # cleanup
+                        s/^##//g
+                        # convert attribute name-only matches
+                        s/^\[\([^=][^=]*\)\]/\1/g
+                        # convert dots
+                        s/\.\([^\.]\)/\\.\1/g
+                        s/$/|/
+                    ' "${html_file}" | sort -u | while read -r line; do
+                        # number of matches within one rule impacts runtime of each request to modify the content
+                        if [ "${#lines[@]}" -lt 1000 ]; then
+                            lines+=("$line")
+                            continue
+                        fi
+                        # complexity of regex impacts runtime of each request to modify the content
+                        # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
+                        # printf to inject both quoting characters " and '
+                        printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                        # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
+                        printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                        # printf to inject both quoting characters " and '
+                        printf ').*>.*<\/\\1[^>]*>@@g\n'
+                        lines=()
+                    done
+                    # process last chunk with less than 1000 entries
+                    if [ "${#lines[@]}" -gt 0 ]; then
+                        printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                        printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                        printf ').*>.*<\/\\1[^>]*>@@g\n'
+                    fi
                 fi
-                # complexity of regex impacts runtime of each request to modify the content
-                # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
-                # printf to inject both quoting characters " and '
-                printf 's@<([a-zA-Z0-9]+)\\s+.*('
-                # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
-                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
-                # printf to inject both quoting characters " and '
-                printf ').*>.*<\/\\1[^>]*>@@g\n'
-                lines=()
-            done
-            # process last chunk with less than 1000 entries
-            if [ "${#lines[@]}" -gt 0 ]; then
-                printf 's@<([a-zA-Z0-9]+)\\s+.*('
-                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
-                printf ').*>.*<\/\\1[^>]*>@@g\n'
-            fi
 
-            echo "FILTER: ${list}_attribute_exact Tag filter of ${list}"
-            lines=()
-            # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
-            sed -e '
-                # only process gloabl classes
-                /^##\[[^=^*][^=^*]*=.*$/!d
-                # remove all matches with combinators
-                /^##.*[>+~ ].*/d
-                # cleanup
-                s/^##//g
-                # convert attribute name-only matches
-                s/^\[\([^=][^=]*\)=\(.*\)\]/\1=\2/g
-                # convert dots
-                s/\.\([^\.]\)/\\.\1/g
-                s/$/|/
-            ' "${html_file}" | sort -u | while read -r line; do
-                # number of matches within one rule impacts runtime of each request to modify the content
-                if [ "${#lines[@]}" -lt 1000 ]; then
-                    lines+=("$line")
-                    continue
+                if filter_active "attribute_global_exact"; then
+                    echo "FILTER: ${list}_attribute_global_exact Tag filter of ${list}"
+                    lines=()
+                    # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
+                    sed -e '
+                        # only process gloabl classes
+                        /^##\[[^=^*][^=^*]*=.*$/!d
+                        # remove all matches with combinators
+                        /^##.*[>+~ ].*/d
+                        # cleanup
+                        s/^##//g
+                        # convert attribute name-only matches
+                        s/^\[\([^=][^=]*\)=\(.*\)\]/\1=\2/g
+                        # convert dots
+                        s/\.\([^\.]\)/\\.\1/g
+                        s/$/|/
+                    ' "${html_file}" | sort -u | while read -r line; do
+                        # number of matches within one rule impacts runtime of each request to modify the content
+                        if [ "${#lines[@]}" -lt 1000 ]; then
+                            lines+=("$line")
+                            continue
+                        fi
+                        # complexity of regex impacts runtime of each request to modify the content
+                        # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
+                        # printf to inject both quoting characters " and '
+                        printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                        # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
+                        printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                        # printf to inject both quoting characters " and '
+                        printf ').*>.*<\/\\1[^>]*>@@g\n'
+                        lines=()
+                    done
+                    # process last chunk with less than 1000 entries
+                    if [ "${#lines[@]}" -gt 0 ]; then
+                        printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                        printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                        printf ').*>.*<\/\\1[^>]*>@@g\n'
+                    fi
                 fi
-                # complexity of regex impacts runtime of each request to modify the content
-                # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
-                # printf to inject both quoting characters " and '
-                printf 's@<([a-zA-Z0-9]+)\\s+.*('
-                # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
-                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
-                # printf to inject both quoting characters " and '
-                printf ').*>.*<\/\\1[^>]*>@@g\n'
-                lines=()
-            done
-            # process last chunk with less than 1000 entries
-            if [ "${#lines[@]}" -gt 0 ]; then
-                printf 's@<([a-zA-Z0-9]+)\\s+.*('
-                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
-                printf ').*>.*<\/\\1[^>]*>@@g\n'
-            fi
 
-            echo "FILTER: ${list}_attribute_contain Tag filter of ${list}"
-            lines=()
-            # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
-            sed -e '
-                # only process gloabl classes
-                /^##\[[^*][^*]*\*=.*$/!d
-                # remove all matches with combinators
-                /^##.*[>+~ ].*/d
-                # cleanup
-                s/^##//g
-                # convert dots
-                s/\.\([^\.]\)/\\.\1/g
-                # convert attribute based filter with contain match
-                s/^\[\([^*][^*]*\)\*=\(["'"'"']*\)\([^"][^"]*\)"*\(["'"'"']*\)\]/\1=\2.*\3.*\4/g
-                s/$/|/
-            ' "${html_file}" | sort -u | while read -r line; do
-                # number of matches within one rule impacts runtime of each request to modify the content
-                if [ "${#lines[@]}" -lt 1000 ]; then
-                    lines+=("$line")
-                    continue
+                if filter_active "attribute_global_contain"; then
+                    echo "FILTER: ${list}_attribute_global_contain Tag filter of ${list}"
+                    lines=()
+                    # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
+                    sed -e '
+                        # only process gloabl classes
+                        /^##\[[^*][^*]*\*=.*$/!d
+                        # remove all matches with combinators
+                        /^##.*[>+~ ].*/d
+                        # cleanup
+                        s/^##//g
+                        # convert dots
+                        s/\.\([^\.]\)/\\.\1/g
+                        # convert attribute based filter with contain match
+                        s/^\[\([^*][^*]*\)\*=\(["'"'"']*\)\([^"][^"]*\)"*\(["'"'"']*\)\]/\1=\2.*\3.*\4/g
+                        s/$/|/
+                    ' "${html_file}" | sort -u | while read -r line; do
+                        # number of matches within one rule impacts runtime of each request to modify the content
+                        if [ "${#lines[@]}" -lt 1000 ]; then
+                            lines+=("$line")
+                            continue
+                        fi
+                        # complexity of regex impacts runtime of each request to modify the content
+                        # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
+                        # printf to inject both quoting characters " and '
+                        printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                        # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
+                        printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                        # printf to inject both quoting characters " and '
+                        printf ').*>.*<\/\\1[^>]*>@@g\n'
+                        lines=()
+                    done
+                    # process last chunk with less than 1000 entries
+                    if [ "${#lines[@]}" -gt 0 ]; then
+                        printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                        printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                        printf ').*>.*<\/\\1[^>]*>@@g\n'
+                    fi
                 fi
-                # complexity of regex impacts runtime of each request to modify the content
-                # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
-                # printf to inject both quoting characters " and '
-                printf 's@<([a-zA-Z0-9]+)\\s+.*('
-                # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
-                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
-                # printf to inject both quoting characters " and '
-                printf ').*>.*<\/\\1[^>]*>@@g\n'
-                lines=()
-            done
-            # process last chunk with less than 1000 entries
-            if [ "${#lines[@]}" -gt 0 ]; then
-                printf 's@<([a-zA-Z0-9]+)\\s+.*('
-                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
-                printf ').*>.*<\/\\1[^>]*>@@g\n'
-            fi
 
-            echo "FILTER: ${list}_attribute_startswith Tag filter of ${list}"
-            lines=()
-            # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
-            sed -e '
-                # only process gloabl classes
-                /^##\[[^=^][^=^]*\^=.*$/!d
-                # remove all matches with combinators
-                /^##.*[>+~ ].*/d
-                # cleanup
-                s/^##//g
-                # convert dots
-                s/\.\([^\.]\)/\\.\1/g
-                # convert attribute based filter with startwith match
-                s/^\[\([^^][^^]*\)^=\(["'"'"']*\)\(.*[^"'"'"']\)\(["'"'"']*\)\]/\1=\2\3.*\4/g
-                s/$/|/
-            ' "${html_file}" | sort -u | while read -r line; do
-                # number of matches within one rule impacts runtime of each request to modify the content
-                if [ "${#lines[@]}" -lt 1000 ]; then
-                    lines+=("$line")
-                    continue
+                if filter_active "attribute_global_startswith"; then
+                    echo "FILTER: ${list}_attribute_global_startswith Tag filter of ${list}"
+                    lines=()
+                    # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
+                    sed -e '
+                        # only process gloabl classes
+                        /^##\[[^=^][^=^]*\^=.*$/!d
+                        # remove all matches with combinators
+                        /^##.*[>+~ ].*/d
+                        # cleanup
+                        s/^##//g
+                        # convert dots
+                        s/\.\([^\.]\)/\\.\1/g
+                        # convert attribute based filter with startwith match
+                        s/^\[\([^^][^^]*\)^=\(["'"'"']*\)\(.*[^"'"'"']\)\(["'"'"']*\)\]/\1=\2\3.*\4/g
+                        s/$/|/
+                    ' "${html_file}" | sort -u | while read -r line; do
+                        # number of matches within one rule impacts runtime of each request to modify the content
+                        if [ "${#lines[@]}" -lt 1000 ]; then
+                            lines+=("$line")
+                            continue
+                        fi
+                        # complexity of regex impacts runtime of each request to modify the content
+                        # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
+                        # printf to inject both quoting characters " and '
+                        printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                        # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
+                        printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                        # printf to inject both quoting characters " and '
+                        printf ').*>.*<\/\\1[^>]*>@@g\n'
+                        lines=()
+                    done
+                    # process last chunk with less than 1000 entries
+                    if [ "${#lines[@]}" -gt 0 ]; then
+                        printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                        printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                        printf ').*>.*<\/\\1[^>]*>@@g\n'
+                    fi
                 fi
-                # complexity of regex impacts runtime of each request to modify the content
-                # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
-                # printf to inject both quoting characters " and '
-                printf 's@<([a-zA-Z0-9]+)\\s+.*('
-                # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
-                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
-                # printf to inject both quoting characters " and '
-                printf ').*>.*<\/\\1[^>]*>@@g\n'
-                lines=()
-            done
-            # process last chunk with less than 1000 entries
-            if [ "${#lines[@]}" -gt 0 ]; then
-                printf 's@<([a-zA-Z0-9]+)\\s+.*('
-                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
-                printf ').*>.*<\/\\1[^>]*>@@g\n'
-            fi
 
-            echo "FILTER: ${list}_attribute_endswith Tag filter of ${list}"
-            lines=()
-            # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
-            sed -e '
-                # only process gloabl classes
-                /^##\[[^$][^=$]*\$=.*$/!d
-                # remove all matches with combinators
-                /^##.*[>+~ ].*/d
-                # cleanup
-                s/^##//g
-                # convert dots
-                s/\.\([^\.]\)/\\.\1/g
-                # convert attribute based filter with endswith match
-                s/^\[\([^\$][^\$]*\)\$=\(["'"'"']*\)\(.*[^"'"'"']\)\(["'"'"']*\)\]/\1=\2.*\3\4/g
-                s/$/|/
-            ' "${html_file}" | sort -u | while read -r line; do
-                # number of matches within one rule impacts runtime of each request to modify the content
-                if [ "${#lines[@]}" -lt 1000 ]; then
-                    lines+=("$line")
-                    continue
+                if filter_active "attribute_global_endswith"; then
+                    echo "FILTER: ${list}_attribute_global_endswith Tag filter of ${list}"
+                    lines=()
+                    # using while-loop as privoxy cannot handle more than 2000 or-connected strings within one regex
+                    sed -e '
+                        # only process gloabl classes
+                        /^##\[[^$][^=$]*\$=.*$/!d
+                        # remove all matches with combinators
+                        /^##.*[>+~ ].*/d
+                        # cleanup
+                        s/^##//g
+                        # convert dots
+                        s/\.\([^\.]\)/\\.\1/g
+                        # convert attribute based filter with endswith match
+                        s/^\[\([^\$][^\$]*\)\$=\(["'"'"']*\)\(.*[^"'"'"']\)\(["'"'"']*\)\]/\1=\2.*\3\4/g
+                        s/$/|/
+                    ' "${html_file}" | sort -u | while read -r line; do
+                        # number of matches within one rule impacts runtime of each request to modify the content
+                        if [ "${#lines[@]}" -lt 1000 ]; then
+                            lines+=("$line")
+                            continue
+                        fi
+                        # complexity of regex impacts runtime of each request to modify the content
+                        # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
+                        # printf to inject both quoting characters " and '
+                        printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                        # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
+                        printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                        # printf to inject both quoting characters " and '
+                        printf ').*>.*<\/\\1[^>]*>@@g\n'
+                        lines=()
+                    done
+                    # process last chunk with less than 1000 entries
+                    if [ "${#lines[@]}" -gt 0 ]; then
+                        printf 's@<([a-zA-Z0-9]+)\\s+.*('
+                        printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
+                        printf ').*>.*<\/\\1[^>]*>@@g\n'
+                    fi
                 fi
-                # complexity of regex impacts runtime of each request to modify the content
-                # using removal of whole HTML tag as multiple matches with different classes in same element are not possible
-                # printf to inject both quoting characters " and '
-                printf 's@<([a-zA-Z0-9]+)\\s+.*('
-                # using tr to merge lines because sed-based approachs takes up to 6 MB RAM and >10 seconds during testing
-                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
-                # printf to inject both quoting characters " and '
-                printf ').*>.*<\/\\1[^>]*>@@g\n'
-                lines=()
-            done
-            # process last chunk with less than 1000 entries
-            if [ "${#lines[@]}" -gt 0 ]; then
-                printf 's@<([a-zA-Z0-9]+)\\s+.*('
-                printf '%s\n' "${lines[@]}" | sed '$ s/|//' | tr -d '\n'
-                printf ').*>.*<\/\\1[^>]*>@@g\n'
-            fi
-            shopt -u lastpipe
-        ) >> "${filterfile}"
-
-        debug 1 "... registering ${list}_attribute filters in actionfile ..."
-        (
-            echo "{ +filter{${list}_attribute_global_name_only} }"
-            echo "/"
-            echo "{ +filter{${list}_attribute_exact} }"
-            echo "/"
-            echo "{ +filter{${list}_attribute_contain} }"
-            echo "/"
-            echo "{ +filter{${list}_attribute_startswith} }"
-            echo "/"
-            echo "{ +filter{${list}_attribute_endswith} }"
-            echo "/"
-        ) >> "${actionfile}"
-        debug 1 "... registered ..."
-
-        # FIXME: add attribute handling with domains
-        # FIXME: add attribute handling with combinators
-        # FIXME: add combination of classes and attributes: ##.OUTBRAIN[data-widget-id^="FMS_REELD_"]
+                shopt -u lastpipe
+            ) >> "${filterfile}"
+
+            debug 1 "... registering ${list}_attribute filters in actionfile ..."
+            (
+                if filter_active "attribute_global_name"; then
+                    echo "{ +filter{${list}_attribute_global_name} }"
+                    echo "/"
+                fi
+                if filter_active "attribute_global_exact"; then
+                    echo "{ +filter{${list}_attribute_global_exact} }"
+                    echo "/"
+                fi
+                if filter_active "attribute_global_contain"; then
+                    echo "{ +filter{${list}_attribute_global_contain} }"
+                    echo "/"
+                fi
+                if filter_active "attribute_global_startswith"; then
+                    echo "{ +filter{${list}_attribute_global_startswith} }"
+                    echo "/"
+                fi
+                if filter_active "attribute_global_endswith"; then
+                    echo "{ +filter{${list}_attribute_global_endswith} }"
+                    echo "/"
+                fi
+            ) >> "${actionfile}"
+            debug 1 "... registered ..."
+
+            # FIXME: add attribute handling with domains
+            # FIXME: add attribute handling with combinators
+            # FIXME: add combination of classes and attributes: ##.OUTBRAIN[data-widget-id^="FMS_REELD_"]
+        fi
 
         # create domain based whitelist
 
@@ -690,16 +749,16 @@ function remove() {
 VERBOSE=()
 method="main"
 OS="$(uname)"
+OPT_FILTERS=()
 
 # loop for options
-while getopts ":c:hrqv:V" opt; do
+while getopts ":c:f:hrqv:V" opt; do
     case "${opt}" in
         "c")
             SCRIPTCONF="${OPTARG}"
             ;;
-        "v")
-            OPT_DBG="${OPTARG}"
-            VERBOSE=("-v")
+        "f")
+            OPT_FILTERS+=("${OPTARG,,}")
             ;;
         "q")
             OPT_DBG=-1
@@ -707,6 +766,10 @@ while getopts ":c:hrqv:V" opt; do
         "r")
             method="remove"
             ;;
+        "v")
+            OPT_DBG="${OPTARG}"
+            VERBOSE=("-v")
+            ;;
         "V")
             # <main> is replaced by release process
             echo "Version: <main>"
@@ -723,6 +786,13 @@ while getopts ":c:hrqv:V" opt; do
     esac
 done
 
+if [ -n "${OPT_FILTERS[*]}" ]; then
+    if unknown="$(grep -vxFf <(printf '%s\n' "${FILTERTYPES[@]}") <(printf '%s\n' "${OPT_FILTERS[@]}"))"; then
+        error "Unknown filters: ${unknown}"
+        exit 1
+    fi
+fi
+
 prepare
 
 trap 'rm -fr "${TMPDIR}";exit' INT TERM EXIT
@@ -731,6 +801,7 @@ lock
 debug 2 "URL-List: ${URLS[*]}"
 debug 2 "Privoxy-Configdir: ${PRIVOXY_DIR}"
 debug 2 "Temporary directory: ${TMPDIR}"
+debug 2 "Content filters: ${OPT_FILTERS[*]:-disabled}"
 "${method}"
 
 # restore default exit command
diff --git a/tests/conftest.py b/tests/conftest.py
index 2325574..901636b 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -88,6 +88,26 @@ def webserver(httpserver) -> UrlParsed:
     return UrlParsed(httpserver.url_for("/"))
 
 
+@pytest.fixture(scope="module")
+def filtertypes() -> list[str]:
+    """Return filtertypes supported by privoxy-blocklist."""
+    filter_types = []
+    with Path(__file__).parent.parent.joinpath("privoxy-blocklist.sh").open(
+        "r", encoding="UTF-8"
+    ) as f_h:
+        found_line = False
+        for line in f_h.readlines():
+            if not found_line and not line.startswith("FILTERTYPES"):
+                continue
+            if line.startswith("FILTERTYPES"):
+                found_line = True
+                continue
+            if line.endswith(")\n"):
+                break
+            filter_types.append(line.strip().strip('"'))
+    return filter_types
+
+
 @pytest.fixture(scope="module")
 def privoxy_blocklist() -> str:
     """Return the path to privoxy-blocklist.sh."""
diff --git a/tests/test_01_root_execute.py b/tests/test_01_root_execute.py
index 16b1b51..0d59fd7 100644
--- a/tests/test_01_root_execute.py
+++ b/tests/test_01_root_execute.py
@@ -48,9 +48,21 @@ def test_version_option(shell, tmp_path, privoxy_blocklist) -> None:
     assert ret.stdout == "Version: 0.0.1\n"
 
 
-def test_next_run(shell, privoxy_blocklist) -> None:
+def test_filter_check(shell, privoxy_blocklist) -> None:
+    """Test filtertype check."""
+    cmd = [privoxy_blocklist, "-f", "bla"]
+    ret_script = shell.run(*cmd)
+    assert ret_script.returncode == 1
+    assert "" == ret_script.stdout
+    assert "Unknown filters: bla" in ret_script.stderr.strip()
+
+
+def test_next_run(shell, privoxy_blocklist, filtertypes) -> None:
     """Test followup runs."""
-    ret_script = shell.run(privoxy_blocklist)
+    cmd = [privoxy_blocklist]
+    for filtertype in filtertypes:
+        cmd.extend(["-f", filtertype])
+    ret_script = shell.run(*cmd)
     assert ret_script.returncode == 0
     ret_privo = shell.run(
         "/usr/sbin/privoxy", "--no-daemon", "--config-test", "/etc/privoxy/config"

From 4ed83f9f5cc1c8838cf8fa9ba3ee25d85e4909f6 Mon Sep 17 00:00:00 2001
From: Andrwe Lord Weber <github@andrwe.org>
Date: Sun, 28 Jan 2024 23:04:24 +0100
Subject: [PATCH 10/11] update implementation status in README

Signed-off-by: Andrwe Lord Weber <github@andrwe.org>
---
 README.md            | 21 ++++++++++++++++-----
 privoxy-blocklist.sh |  3 ++-
 2 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index c9bf04f..b6861ec 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,6 @@ The following table shows features of AdBlock Plus filters and there status with
 
 | Feature | Type | Status | Test |
 | ------- | ---- | ------ | ---- |
-| `#$#` | CSS selector - Snippet filter | :question: | :question: |
 | `:-abp-contains()` | extended CSS selector | :question: | :question: |
 | `:-abp-has()` | extended CSS selector | :question: | :question: |
 | `:-abp-properties()` | extended CSS selector | :question: | :question: |
@@ -33,10 +32,22 @@ The following table shows features of AdBlock Plus filters and there status with
 | `\|…\|` | block exact domain matching including scheme | :question: | :question: |
 | `!…` | comments | :white_check_mark: | |
 | `csp=` | filter options | :question: | :question: |
-| `##…[…]` | CSS attribute selector | :question: | :question: |
-| `##` | CSS selector - Element hiding | :white_check_mark: | |
-| `#?#` | CSS selector - Element hiding emulation | :question: | :question: |
-| `#@#` | CSS selector - Element hiding exception | :question: | :question: |
+| `##.class` | global CSS attribute selector with matching for class | :white_check_mark: | :white_check_mark: |
+| `###id` | global CSS attribute selector with matching for id | :white_check_mark: | :white_check_mark: |
+| `##[attribute]` | global CSS attribute selector with matching for attribute-name | :white_check_mark: | :white_check_mark: |
+| `##[attribute=value]` | global CSS attribute selector with matching for attribute-value pair | :white_check_mark: | :white_check_mark: |
+| `##[attribute^=value]` | global CSS attribute selector with matching for attribute with value starting with | :white_check_mark: | :white_check_mark: |
+| `##[attribute$=value]` | global CSS attribute selector with matching for attribute with value ending with | :white_check_mark: | :white_check_mark: |
+| `##[attribute*=value]` | global CSS attribute selector with matching for attribute with value containing | :white_check_mark: | :white_check_mark: |
+| `##html-tag[attribute]` | global CSS attribute selector for html-tag with matching for attribute-name | :construction: | :construction: |
+| `##html-tag[attribute=value]` | global CSS attribute selector for html-tag with matching for attribute-value pair | :construction: | :construction: |
+| `##html-tag[attribute^=value]` | global CSS attribute selector for html-tag with matching for attribute with value starting with | :construction: | :construction: |
+| `##html-tag[attribute$=value]` | global CSS attribute selector for html-tag with matching for attribute with value ending with | :construction: | :construction: |
+| `##html-tag[attribute*=value]` | global CSS attribute selector for html-tag with matching for attribute with value containing | :construction: | :construction: |
+| `[…]#$#` | domain based CSS selector - Snippet filter | :question: | :question: |
+| `[…]##` | domain based CSS selector - Element hiding | :white_check_mark: | |
+| `[…]#?#` | domain based CSS selector - Element hiding emulation | :question: | :question: |
+| `[…]#@#` | domain based CSS selector - Element hiding exception | :question: | :question: |
 | `document` | filter options | :question: | :question: |
 | `~domain=` | filter options | :question: | :question: |
 | `domain=` | filter options | :question: | :question: |
diff --git a/privoxy-blocklist.sh b/privoxy-blocklist.sh
index 51d251c..6842fba 100755
--- a/privoxy-blocklist.sh
+++ b/privoxy-blocklist.sh
@@ -160,6 +160,8 @@ EOF
     if [ -n "${OPT_FILTERS[*]}" ]; then
         FILTERS=("${OPT_FILTERS[@]}")
     fi
+    debug 2 "Content filters: ${OPT_FILTERS[*]:-disabled}"
+
     # load privoxy config
     # shellcheck disable=SC1090
     if [[ -r "${INIT_CONF:-no-init-conf}" ]]; then
@@ -801,7 +803,6 @@ lock
 debug 2 "URL-List: ${URLS[*]}"
 debug 2 "Privoxy-Configdir: ${PRIVOXY_DIR}"
 debug 2 "Temporary directory: ${TMPDIR}"
-debug 2 "Content filters: ${OPT_FILTERS[*]:-disabled}"
 "${method}"
 
 # restore default exit command

From 9f874fcf0ba74b26ee16962643452560fe05f497 Mon Sep 17 00:00:00 2001
From: Andrwe Lord Weber <github@andrwe.org>
Date: Sun, 28 Jan 2024 23:16:56 +0100
Subject: [PATCH 11/11] update test configurations

Signed-off-by: Andrwe Lord Weber <github@andrwe.org>
---
 tests/configs/debugging.conf           | 5 +++++
 tests/configs/url_extended_config.conf | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/tests/configs/debugging.conf b/tests/configs/debugging.conf
index 05f13ef..ac9b183 100644
--- a/tests/configs/debugging.conf
+++ b/tests/configs/debugging.conf
@@ -7,6 +7,11 @@ URLS=(
   "https://easylist-downloads.adblockplus.org/easylist.txt"
 )
 
+# array of content filters to convert
+#   for supported values check: $0 -h
+#   empty by default to deactivate as content filters slowdown privoxy a lot
+FILTERS=()
+
 # config for privoxy initscript providing PRIVOXY_CONF, PRIVOXY_USER and PRIVOXY_GROUP
 #INIT_CONF="/etc/conf.d/privoxy"
 
diff --git a/tests/configs/url_extended_config.conf b/tests/configs/url_extended_config.conf
index 236b1f8..65f9c9b 100644
--- a/tests/configs/url_extended_config.conf
+++ b/tests/configs/url_extended_config.conf
@@ -8,6 +8,11 @@ URLS=(
   "https://raw.githubusercontent.com/easylist/easylist/master/easylist/easylist_allowlist_general_hide.txt"
 )
 
+# array of content filters to convert
+#   for supported values check: $0 -h
+#   empty by default to deactivate as content filters slowdown privoxy a lot
+FILTERS=()
+
 # config for privoxy initscript providing PRIVOXY_CONF, PRIVOXY_USER and PRIVOXY_GROUP
 #INIT_CONF="/etc/conf.d/privoxy"