From: Peter Hofmann Date: Sun, 30 Nov 2014 14:50:51 +0000 (+0100) Subject: Re-implement adblock as a web extension X-Git-Tag: v1.0.0~207 X-Git-Url: https://git.armaanb.net/?p=chorizo.git;a=commitdiff_plain;h=dae6061a900777aff7c4bfbddf2cef191743cfce Re-implement adblock as a web extension --- diff --git a/Makefile b/Makefile index 5dad98b..6d1c811 100644 --- a/Makefile +++ b/Makefile @@ -3,6 +3,9 @@ __NAME__ = lariza __NAME_UPPERCASE__ = `echo $(__NAME__) | sed 's/.*/\U&/'` __NAME_CAPITALIZED__ = `echo $(__NAME__) | sed 's/^./\U&\E/'` +.PHONY: all +all: $(__NAME__) we_adblock.so + $(__NAME__): browser.c $(CC) $(CFLAGS) $(LDFLAGS) \ -D__NAME__=\"$(__NAME__)\" \ @@ -11,5 +14,13 @@ $(__NAME__): browser.c -o $@ $< \ `pkg-config --cflags --libs gtk+-3.0 glib-2.0 webkit2gtk-4.0` +we_adblock.so: we_adblock.c + $(CC) $(CFLAGS) $(LDFLAGS) \ + -D__NAME__=\"$(__NAME__)\" \ + -D__NAME_UPPERCASE__=\"$(__NAME_UPPERCASE__)\" \ + -D__NAME_CAPITALIZED__=\"$(__NAME_CAPITALIZED__)\" \ + -shared -o $@ -fPIC $< \ + `pkg-config --cflags --libs glib-2.0 webkit2gtk-4.0` + clean: - rm -f $(__NAME__) + rm -f $(__NAME__) we_adblock.so diff --git a/README b/README index e105f89..b706069 100644 --- a/README +++ b/README @@ -17,6 +17,8 @@ Features: - Global content zoom - Cooperative instances using FIFOs - Support for Flash and Java + - Bundled web extensions: + - Adblock ============================================== @@ -30,9 +32,6 @@ http://blogs.igalia.com/carlosgc/2014/08/01/webkitgtk-2-5-1-good-bye-webkit1/ There's a number of issues on this branch: - - No adblock support. As stated in commit f1174ff, this has to be - implemented as a "WebKit2 web extension". Maybe do this in a - project of its own? - "View source" mode is not yet ported. This feature has been removed from WebKit2, so we have to invent some way to re-create this... @@ -302,6 +301,35 @@ won't be touched. Instead, the new file name will have a suffix such as ".1", ".2", ".3" and so on. +====================== +Bundled web extensions +====================== + +On startup, WebKit checks ~/.local/share/lariza/web_extensions for any +.so files. See this blog post for further information on these +extensions: + +http://blogs.igalia.com/carlosgc/2013/09/10/webkit2gtk-web-process-extensions/ + +lariza comes with the following extensions: + + we_adblock.so + + Generic adblock. Reads patterns from the following file: + + ~/.config/lariza/adblock.black + + Each line can contain a regular expression. These expressions + match case-insensitive and partially, i.e. ".*foo.*" is the same + as ".*FOO.*" and you can use anchors like "^https?://...". + + Lines starting with "#" are ignored. + +Those bundled web extensions are automatically compiled when you run +make. To use them, though, make sure to copy them to the directory +mentioned above. + + ==================== WebKit local storage ==================== @@ -334,3 +362,8 @@ API references: - http://webkitgtk.org/reference/webkit2gtk/stable/index.html - https://developer.gnome.org/gtk3/stable/index.html - https://developer.gnome.org/glib/stable/index.html + +Regular expressions supported by GRegex, you can use these in your +adblock patterns: + + - https://developer.gnome.org/glib/stable/glib-regex-syntax.html diff --git a/we_adblock.c b/we_adblock.c new file mode 100644 index 0000000..be4e7b7 --- /dev/null +++ b/we_adblock.c @@ -0,0 +1,81 @@ +#include + +#include +#include + + +static GSList *adblock_patterns = NULL; + + +static void +adblock_load(void) +{ + GRegex *re = NULL; + GError *err = NULL; + GIOChannel *channel = NULL; + gchar *path = NULL, *buf = NULL; + + path = g_build_filename(g_get_user_config_dir(), __NAME__, "adblock.black", + NULL); + channel = g_io_channel_new_file(path, "r", &err); + if (channel != NULL) + { + while (g_io_channel_read_line(channel, &buf, NULL, NULL, NULL) + == G_IO_STATUS_NORMAL) + { + g_strstrip(buf); + if (buf[0] != '#') + { + re = g_regex_new(buf, + G_REGEX_CASELESS | G_REGEX_OPTIMIZE, + G_REGEX_MATCH_PARTIAL, &err); + if (err != NULL) + { + fprintf(stderr, __NAME__": Could not compile regex: %s\n", buf); + g_error_free(err); + err = NULL; + } + else + adblock_patterns = g_slist_append(adblock_patterns, re); + } + g_free(buf); + } + g_io_channel_shutdown(channel, FALSE, NULL); + } + g_free(path); +} + +static gboolean +web_page_send_request(WebKitWebPage *web_page, WebKitURIRequest *request, + WebKitURIResponse *redirected_response, gpointer user_data) +{ + GSList *it = adblock_patterns; + const gchar *uri; + + uri = webkit_uri_request_get_uri(request); + + while (it) + { + if (g_regex_match((GRegex *)(it->data), uri, 0, NULL)) + return TRUE; + it = g_slist_next(it); + } + + return FALSE; +} + +static void +web_page_created_callback(WebKitWebExtension *extension, WebKitWebPage *web_page, + gpointer user_data) +{ + g_signal_connect_object(web_page, "send-request", + G_CALLBACK(web_page_send_request), NULL, 0); +} + +G_MODULE_EXPORT void +webkit_web_extension_initialize(WebKitWebExtension *extension) +{ + adblock_load(); + g_signal_connect(extension, "page-created", + G_CALLBACK(web_page_created_callback), NULL); +}