annotate common/common.cpp @ 0:a4671277546c tip

created the repository for the thymian project
author ferencd
date Tue, 17 Aug 2021 11:19:54 +0200
parents
children
rev   line source
ferencd@0 1 #include "common.h"
ferencd@0 2
ferencd@0 3 #include <boost/tokenizer.hpp>
ferencd@0 4 #include <boost/algorithm/string/predicate.hpp>
ferencd@0 5 #include <boost/algorithm/string.hpp>
ferencd@0 6
ferencd@0 7 #include <fpaq0.h>
ferencd@0 8
ferencd@0 9 #include <cstdlib>
ferencd@0 10 #include <ctime>
ferencd@0 11 #include <algorithm>
ferencd@0 12 #include <cassert>
ferencd@0 13 #include <cstring>
ferencd@0 14 #include <cmath>
ferencd@0 15 #include <sstream>
ferencd@0 16 #include <iterator>
ferencd@0 17 #include <cctype>
ferencd@0 18
ferencd@0 19 // Should stay here, do not move before <algorithm> otherwise strange warnings will come
ferencd@0 20 #include <log.h>
ferencd@0 21
ferencd@0 22
ferencd@0 23 const std::string platform()
ferencd@0 24 {
ferencd@0 25 #ifdef __ANDROID__
ferencd@0 26 return HOSTT_ANDROID;
ferencd@0 27 #elif defined __linux__
ferencd@0 28 return HOSTT_LINUX;
ferencd@0 29 #elif defined _WIN32
ferencd@0 30 return HOSTT_WINDOWS;
ferencd@0 31 #else
ferencd@0 32 return HOSTT_UNKNOWN;
ferencd@0 33 #endif
ferencd@0 34 }
ferencd@0 35
ferencd@0 36 namespace unafrog { namespace utils {
ferencd@0 37
ferencd@0 38 namespace random {
ferencd@0 39
ferencd@0 40 std::string random_string( size_t length, unafrog::utils::random::random_string_class cls )
ferencd@0 41 {
ferencd@0 42 auto randchar = [cls]() -> char
ferencd@0 43 {
ferencd@0 44 auto charset = [cls]() -> std::string {
ferencd@0 45 switch (cls) {
ferencd@0 46 case unafrog::utils::random::random_string_class::RSC_DEC:
ferencd@0 47 return "0123456789";
ferencd@0 48 case unafrog::utils::random::random_string_class::RSC_HEX:
ferencd@0 49 return "0123456789abcdef";
ferencd@0 50 case unafrog::utils::random::random_string_class::RSC_ASC_DEC:
ferencd@0 51 return "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
ferencd@0 52 case unafrog::utils::random::random_string_class::RSC_B64:
ferencd@0 53 return "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/";
ferencd@0 54 case unafrog::utils::random::random_string_class::RSC_FULL:
ferencd@0 55 return "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ|!#$%&/()=?{[]}+\\-_.:,;'*^";
ferencd@0 56 }
ferencd@0 57 return "10";
ferencd@0 58 }();
ferencd@0 59
ferencd@0 60 const size_t max_index = (charset.length() - 1);
ferencd@0 61 return charset[ rand() % max_index ];
ferencd@0 62 };
ferencd@0 63 std::string str(length, 0);
ferencd@0 64 std::generate_n( str.begin(), length, randchar );
ferencd@0 65 return str;
ferencd@0 66 }
ferencd@0 67
ferencd@0 68 } //random
ferencd@0 69
ferencd@0 70 namespace b62 {
ferencd@0 71
ferencd@0 72 static const char base62_vals[] = "0123456789"
ferencd@0 73 "abcdefghijklmnopqrstuvwxyz"
ferencd@0 74 "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
ferencd@0 75
ferencd@0 76 static const int base62_index[] = {
ferencd@0 77 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
ferencd@0 78 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
ferencd@0 79 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
ferencd@0 80 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
ferencd@0 81 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0, 0,
ferencd@0 82 0, 0, 0, 0, 0, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a,
ferencd@0 83 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36,
ferencd@0 84 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0, 0, 0, 0, 0,
ferencd@0 85 0, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14,
ferencd@0 86 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20,
ferencd@0 87 0x21, 0x22, 0x23,
ferencd@0 88 };
ferencd@0 89
ferencd@0 90 void strreverse_inplace (char *str)
ferencd@0 91 {
ferencd@0 92 char c;
ferencd@0 93 int half;
ferencd@0 94 int len;
ferencd@0 95 int i;
ferencd@0 96
ferencd@0 97 len = strlen(str);
ferencd@0 98 half = len >> 1;
ferencd@0 99 for (i = 0; i < half; i++) {
ferencd@0 100 c = str[i];
ferencd@0 101 str[i] = str[len - i - 1];
ferencd@0 102 str[len - i - 1] = c;
ferencd@0 103 }
ferencd@0 104 }
ferencd@0 105
ferencd@0 106 std::string base62_encode (uint64_t val)
ferencd@0 107 {
ferencd@0 108 char str[128] = {0};
ferencd@0 109 size_t i = 0, len = 128;
ferencd@0 110 int v;
ferencd@0 111
ferencd@0 112 assert(str);
ferencd@0 113 assert(len > 0);
ferencd@0 114
ferencd@0 115 do {
ferencd@0 116 if (i + 1 >= len)
ferencd@0 117 return "";
ferencd@0 118 v = val % 62;
ferencd@0 119 str[i++] = base62_vals[v];
ferencd@0 120 val = (val - v) / 62;
ferencd@0 121 } while (val > 0);
ferencd@0 122 str[i] = '\0';
ferencd@0 123 strreverse_inplace(str);
ferencd@0 124
ferencd@0 125 return std::string(str);
ferencd@0 126 }
ferencd@0 127
ferencd@0 128 uint64_t base62_decode (const std::string& str)
ferencd@0 129 {
ferencd@0 130 uint64_t val = 0;
ferencd@0 131 char c;
ferencd@0 132 int len;
ferencd@0 133 int i;
ferencd@0 134
ferencd@0 135 len = str.length();
ferencd@0 136 for (i = 0; i < len; i++) {
ferencd@0 137 c = str[i];
ferencd@0 138 if (!isalnum(c)) {
ferencd@0 139 return -1;
ferencd@0 140 }
ferencd@0 141 val += base62_index[(int)c] *(uint64_t) powl(62, len - i - 1);
ferencd@0 142 }
ferencd@0 143
ferencd@0 144 return val;
ferencd@0 145 }
ferencd@0 146
ferencd@0 147 } // b62
ferencd@0 148
ferencd@0 149 std::string to_upper(const std::string &s)
ferencd@0 150 {
ferencd@0 151 std::string res = s;
ferencd@0 152 std::transform(res.begin(), res.end(), res.begin(), ::toupper);
ferencd@0 153 return res;
ferencd@0 154 }
ferencd@0 155
ferencd@0 156 }} // unafrog::utils
ferencd@0 157
ferencd@0 158 static std::string replace(std::string subject, const std::string& search, const std::string& replace)
ferencd@0 159 {
ferencd@0 160 size_t pos = 0;
ferencd@0 161 while ((pos = subject.find(search, pos)) != std::string::npos)
ferencd@0 162 {
ferencd@0 163 subject.replace(pos, search.length(), replace);
ferencd@0 164 pos += replace.length();
ferencd@0 165 }
ferencd@0 166 return subject;
ferencd@0 167 }
ferencd@0 168
ferencd@0 169 static void encode(std::string& data)
ferencd@0 170 {
ferencd@0 171 std::string buffer;
ferencd@0 172 buffer.reserve(data.size());
ferencd@0 173 for(size_t pos = 0; pos != data.size(); ++pos)
ferencd@0 174 {
ferencd@0 175 switch(data[pos])
ferencd@0 176 {
ferencd@0 177 case '&': buffer.append("&amp;"); break;
ferencd@0 178 case '\"': buffer.append("&quot;"); break;
ferencd@0 179 case '\'': buffer.append("&apos;"); break;
ferencd@0 180 case '<': buffer.append("&lt;"); break;
ferencd@0 181 case '>': buffer.append("&gt;"); break;
ferencd@0 182 default: buffer.append(&data[pos], 1); break;
ferencd@0 183 }
ferencd@0 184 }
ferencd@0 185 data.swap(buffer);
ferencd@0 186 }
ferencd@0 187
ferencd@0 188 // forward declaration
ferencd@0 189 static int get_number(const std::string& s, size_t &i);
ferencd@0 190
ferencd@0 191 std::string unafrog::utils::sanitize_user_input(const std::string &s, bool remove_domains)
ferencd@0 192 {
ferencd@0 193 static auto html_tags = {"a", "abbr", "address", "area", "article",
ferencd@0 194 "aside", "audio", "b", "base", "bdi", "bdo",
ferencd@0 195 "blockquote", "body", "br", "button", "canvas", "caption",
ferencd@0 196 "cite", "code", "col", "colgroup", "data", "datalist",
ferencd@0 197 "dd", "del", "dfn", "div", "dl", "dt", "em", "embed", "fieldset",
ferencd@0 198 "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4",
ferencd@0 199 "h5", "h6", "head", "header", "hr", "html", "i", "iframe", "img",
ferencd@0 200 "input", "ins", "kbd", "keygen", "label", "legend", "li", "link",
ferencd@0 201 "main", "map", "mark", "meta", "meter", "nav", "noscript",
ferencd@0 202 "object", "ol", "optgroup", "option", "output", "p", "param",
ferencd@0 203 "pre", "progress", "q", "rb", "rp", "rt", "rtc", "ruby", "s",
ferencd@0 204 "samp", "script", "section", "select", "small", "source", "span",
ferencd@0 205 "strong", "style", "sub", "sup", "table", "tbody", "td",
ferencd@0 206 "template", "textarea", "tfoot", "th", "thead", "time", "title",
ferencd@0 207 "tr", "track", "u", "ul", "var", "video", "wbr", "--#"};
ferencd@0 208
ferencd@0 209 static auto window_methods_js = {"window.alert", "window.atob", "window.blur", "window.btoa",
ferencd@0 210 "window.clearInterval", "window.clearTimeout", "window.close",
ferencd@0 211 "window.confirm", "window.createPopup", "window.focus", "window.getComputedStyle",
ferencd@0 212 "window.getSelection", "window.matchMedia", "window.moveBy", "window.moveTo",
ferencd@0 213 "window.open", "window.print", "window.prompt", "window.resizeBy",
ferencd@0 214 "window.resizeTo", "window.scroll", "window.scrollBy", "window.scrollTo",
ferencd@0 215 "window.setInterval", "window.setTimeout", "window.stop",
ferencd@0 216 // Should be the last one due to the way the history objects are being accessed
ferencd@0 217 "window." };
ferencd@0 218
ferencd@0 219 static auto history_methods_js = {"history.back", "history.forward", "history.go"};
ferencd@0 220
ferencd@0 221 static auto location_js = {"location.hash", "location.host", "location.hostname", "location.href", "location.origin",
ferencd@0 222 "location.pathname", "location.port", "location.protocol", "location.search",
ferencd@0 223 "location.assign", "location.reload", "location.replace" };
ferencd@0 224
ferencd@0 225 static auto document_js = {"document.activeElement", "document.addEventListener", "document.adoptNode",
ferencd@0 226 "document.anchors", "document.applets", "document.baseURI", "document.body",
ferencd@0 227 "document.close", "document.cookie", "document.createAttribute", "document.createComment",
ferencd@0 228 "document.createDocumentFragment", "document.createElement", "document.createTextNode",
ferencd@0 229 "document.doctype", "document.documentElement", "document.documentMode",
ferencd@0 230 "document.documentURI", "document.domain", "document.domConfig", "document.embeds",
ferencd@0 231 "document.forms", "document.getElementById", "document.getElementsByClassName",
ferencd@0 232 "document.getElementsByName", "document.getElementsByTagName", "document.hasFocus",
ferencd@0 233 "document.head", "document.images", "document.implementation", "document.importNode",
ferencd@0 234 "document.inputEncoding", "document.lastModified", "document.links",
ferencd@0 235 "document.normalize", "document.normalizeDocument", "document.open", "document.querySelector",
ferencd@0 236 "document.querySelectorAll", "document.readyState", "document.referrer",
ferencd@0 237 "document.removeEventListener", "document.renameNode", "document.scripts",
ferencd@0 238 "document.strictErrorChecking", "document.title", "document.URL", "document.write",
ferencd@0 239 "document.writeln"};
ferencd@0 240
ferencd@0 241 static auto js_events = {"onclick", "oncontextmenu", "ondblclick", "onmousedown", "onmouseenter", "onmouseleave",
ferencd@0 242 "onmousemove", "onmouseover", "onmouseout", "onmouseup", "onkeydown", "onkeypress", "onkeyup",
ferencd@0 243 "onabort", "onbeforeunload", "onerror", "onhashchange", "onload", "onpageshow", "onpagehide",
ferencd@0 244 "onresize", "onscroll", "onunload", "onblur", "onchange", "onfocus", "onfocusin", "onfocusout",
ferencd@0 245 "oninput", "oninvalid", "onreset", "onsearch", "onselect", "onsubmit", "ondrag", "ondragend",
ferencd@0 246 "ondragenter", "ondragleave", "ondragover", "ondragstart", "ondrop", "oncopy", "oncut",
ferencd@0 247 "onpaste", "onafterprint", "onbeforeprint", "onabort", "oncanplay", "oncanplaythrough",
ferencd@0 248 "ondurationchange", "onemptied", "onended", "onerror", "onloadeddata", "onloadedmetadata",
ferencd@0 249 "onloadstart", "onpause", "onplay", "onplaying", "onprogress", "onratechange", "onseeked",
ferencd@0 250 "onseeking", "onstalled", "onsuspend", "ontimeupdate", "onvolumechange", "onwaiting",
ferencd@0 251 "animationend", "animationiteration", "animationstart", "transitionend", "onerror",
ferencd@0 252 "onmessage", "onopen", "onmessage", "onmousewheel", "ononline", "onoffline", "onpopstate",
ferencd@0 253 "onshow", "onstorage", "ontoggle", "onwheel", "ontouchcancel", "ontouchend", "ontouchmove",
ferencd@0 254 "ontouchstart", "cancelable", "currentTarget", "defaultPrevented", "eventPhase",
ferencd@0 255 "isTrusted", "timeStamp", "preventDefault", "stopImmediatePropagation", "stopPropagation",
ferencd@0 256 "altKey", "clientX", "clientY", "ctrlKey", "metaKey", "pageX", "pageY", "relatedTarget",
ferencd@0 257 "screenX", "screenY", "shiftKey", "altKey", "ctrlKey", "charCode", "keyCode",
ferencd@0 258 "metaKey", "shiftKey", "newURL", "oldURL", "relatedTarget", "animationName", "elapsedTime",
ferencd@0 259 "propertyName", "elapsedTime", "deltaX", "deltaY", "deltaZ", "deltaMode" };
ferencd@0 260
ferencd@0 261 static auto js_globals = {"decodeURI","decodeURIComponent", "encodeURI", "encodeURIComponent",
ferencd@0 262 "eval", "isFinite", "isNaN", "Number", "parseFloat", "parseInt", "String", "unescape" };
ferencd@0 263
ferencd@0 264 static auto js_navigator = {"appCodeName", "appName", "appVersion", "cookieEnabled",
ferencd@0 265 "geolocation", "onLine", "userAgent" };
ferencd@0 266
ferencd@0 267 static auto toplevel_domains = {".academy",".accountant",".accountants",".cloud",".active",".actor",".adult",".aero",".agency",".airforce",
ferencd@0 268 ".apartments",".app",".archi",".army",".associates",".attorney",".auction",".audio",".autos",".band",".bar",
ferencd@0 269 ".bargains",".beer",".best",".bid",".bike",".bingo",".bio",".biz",".black",".blackfriday",".blog",".blue",
ferencd@0 270 ".boo",".boutique",".build",".builders",".business",".buzz",".cab",".camera",".camp",".cancerresearch",
ferencd@0 271 ".capital",".cards",".care",".career",".careers",".cash",".casino",".catering",".center",".ceo",".channel",
ferencd@0 272 ".chat",".cheap",".christmas",".church",".city",".claims",".cleaning",".click",".clinic",".clothing",
ferencd@0 273 ".club",".coach",".codes",".coffee",".college",".community",".company",".computer",".condos",".construction",
ferencd@0 274 ".consulting",".contractors",".cooking",".cool",".coop",".country",".coupons",".credit",".creditcard",
ferencd@0 275 ".cricket",".cruises",".dad",".dance",".date",".dating",".day",".deals",".degree",".delivery",".democrat",
ferencd@0 276 ".dental",".dentist",".design",".diamonds",".diet",".digital",".direct",".directory",".discount",".dog",
ferencd@0 277 ".domains",".download",".eat",".education",".email",".energy",".engineer",".engineering",".equipment",
ferencd@0 278 ".esq",".estate",".events",".exchange",".expert",".exposed",".express",".fail",".faith",".family",
ferencd@0 279 ".fans",".farm",".fashion",".pid",".finance",".financial",".fish",".fishing",".fit",".fitness",".flights",
ferencd@0 280 ".florist",".flowers",".fly",".foo",".football",".forsale",".foundation",".fund",".furniture",".fyi",
ferencd@0 281 ".gallery",".garden",".gift",".gifts",".gives",".glass",".global",".gold",".golf",".gop",".graphics",
ferencd@0 282 ".green",".gripe",".guide",".guitars",".guru",".healthcare",".help",".here",".hiphop",".hiv",".hockey",
ferencd@0 283 ".holdings",".holiday",".homes",".horse",".host",".hosting",".house",".how",".info",".ing",".ink",
ferencd@0 284 ".institute[59]",".insure",".international",".investments",".jewelry",".jobs",".kim",".kitchen",".land",
ferencd@0 285 ".lawyer",".lease",".legal",".lgbt",".life",".lighting",".limited",".limo",".link",".loan",".loans",
ferencd@0 286 ".lol",".lotto",".love",".luxe",".luxury",".management",".market",".marketing",".markets",".mba",".media",
ferencd@0 287 ".meet",".meme",".memorial",".men",".menu",".mobi",".moe",".money",".mortgage",".motorcycles",".mov",
ferencd@0 288 ".movie",".museum",".name",".navy",".network",".new",".news",".ngo",".ninja",".one",".ong",".onl",
ferencd@0 289 ".online",".ooo",".organic",".partners",".parts",".party",".pharmacy",".photo",".photography",".photos",
ferencd@0 290 ".physio",".pics",".pictures",".feedback",".pink",".pizza",".place",".plumbing",".plus",".poker",".porn",
ferencd@0 291 ".post",".press",".pro",".productions",".prof",".properties",".property",".qpon",".racing",".recipes",
ferencd@0 292 ".red",".rehab",".ren",".rent",".rentals",".repair",".report",".republican",".rest",".review",".reviews",
ferencd@0 293 ".rich",".rip",".rocks",".rodeo",".rsvp",".run",".sale",".school",".science",".services",".sex",".sexy",
ferencd@0 294 ".shoes",".show",".singles",".site",".soccer",".social",".software",".solar",".solutions",".space",
ferencd@0 295 ".studio",".style",".sucks",".supplies",".supply",".support",".surf",".surgery",".systems",".tattoo",
ferencd@0 296 ".tax",".taxi",".team",".tech",".technology",".tel",".tennis",".theater",".tips",".tires",".today",
ferencd@0 297 ".tools",".top",".tours",".town",".toys",".trade",".training",".travel",".university",".vacations",
ferencd@0 298 ".vet",".video",".villas",".vision",".vodka",".vote",".voting",".voyage",".wang",".watch",".webcam",
ferencd@0 299 ".website",".wed",".wedding",".whoswho",".wiki",".win",".wine",".work",".works",".world",".wtf",
ferencd@0 300 ".xxx",".xyz",".yoga",".zone",".maison",".abogado",".gratis",".futbol",".juegos",".soy",".tienda",
ferencd@0 301 ".uno",".viajes",".haus",".immobilien",".jetzt",".kaufen",".reise",".reisen",".schule",".versicherung",
ferencd@0 302 ".desi",".shiksha",".casa",".cafe",".immo",".moda",".voto",".bar",".bank",".coop",".enterprises",
ferencd@0 303 ".industries",".institute",".ltda",".pub",".realtor",".reit",".rest",".restaurant",".sarl",".ventures",
ferencd@0 304 ".capetown",".durban",".joburg",".asia",".krd",".nagoya",".okinawa",".ryukyu",".taipei",".tatar",".tokyo",
ferencd@0 305 ".yokohama",".alsace",".amsterdam",".barcelona",".bayern",".berlin",".brussels",".budapest",".bzh",
ferencd@0 306 ".cat",".cologne",".corsica",".cymru",".eus",".frl",".gal",".gent",".hamburg",".irish",".koeln",".london",
ferencd@0 307 ".madrid",".moscow",".nrw",".paris",".ruhr",".saarland",".scot",".tirol",".vlaanderen",".wales",".wien",
ferencd@0 308 ".zuerich",".miami",".nyc",".quebec",".vegas",".kiwi",".melbourne",".sydney",".lat",".rio",".allfinanz",
ferencd@0 309 ".android",".aquarelle",".axa",".barclays",".barclaycard",".bloomberg",".bmw",".bnl",".bnpparibas",".cal",
ferencd@0 310 ".caravan",".cern",".chrome",".citic",".crs",".cuisinella",".dnp",".dvag",".emerck",".everbank",".firmdale",
ferencd@0 311 ".flsmidth",".frogans",".gbiz",".gle",".globo",".gmail",".gmo",".gmx",".google",".hsbc",".ibm",".kred",
ferencd@0 312 ".lacaixa",".latrobe",".lds",".mango",".mini",".monash",".mormon",".neustar",".nexus",".nhk",".nico",".nra",
ferencd@0 313 ".otsuka",".ovh",".piaget",".pohl",".praxi",".prod",".pwc",".sandvikcoromant",".sca",".scb",".schmidt",".sohu",
ferencd@0 314 ".spiegel",".suzuki",".tui",".uol",".williamhill",".wme",".wtc",".yandex",".youtube",".com",".org",".net",
ferencd@0 315 ".int",".edu",".gov",".mil",".arpa",".ac",".ad",".ae",".af",".ag",".ai",".al",".am",".an",".ao",".aq",".ar",
ferencd@0 316 ".as",".at",".au",".aw",".ax",".az",".ba",".bb",".bd",".be",".bf",".bg",".bh",".bi",".bj",".bm",".bn",".bo",
ferencd@0 317 ".bq",".br",".bs",".bt",".bv",".bw",".by",".bz",".ca",".cc",".cd",".cf",".cg",".ch",".ci",".ck",".cl",".cm",
ferencd@0 318 ".cn",".co",".cr",".cu",".cv",".cw",".cx",".cy",".cz",".de",".dj",".dk",".dm",".do",".dz",".ec",".ee",".eg",
ferencd@0 319 ".eh",".er",".es",".et",".eu",".fi",".fj",".fk",".fm",".fo",".fr",".ga",".gb",".gd",".ge",".gf",".gg",".gh",
ferencd@0 320 ".gi",".gl",".gm",".gn",".gp",".gq",".gr",".gs",".gt",".gu",".gw",".gy",".hk",".hm",".hn",".hr",".ht",".hu",
ferencd@0 321 ".id",".ie",".il",".im",".in",".io",".iq",".ir",".is",".it",".je",".jm",".jo",".jp",".ke",".kg",".kh",".ki",
ferencd@0 322 ".km",".kn",".kp",".kr",".kw",".ky",".kz",".la",".lb",".lc",".li",".lk",".lr",".ls",".lt",".lu",".lv",".ly",
ferencd@0 323 ".ma",".mc",".md",".me",".mg",".mh",".mk",".ml",".mm",".mn",".mo",".mp",".mq",".mr",".ms",".mt",".mu",".mv",
ferencd@0 324 ".mw",".mx",".my",".mz",".na",".nc",".ne",".nf",".ng",".ni",".nl",".no",".np",".nr",".nu",".nz",".om",".pa",
ferencd@0 325 ".pe",".pf",".pg",".ph",".pk",".pl",".pm",".pn",".pr",".ps",".pt",".pw",".py",".qa",".re",".ro",".rs",".ru",
ferencd@0 326 ".rw",".sa",".sb",".sc",".sd",".se",".sg",".authenticator.cloudy.sh",".si",".sj",".sk",".sl",".sm",".sn",".so",".sr",".ss",".st",
ferencd@0 327 ".su",".sv",".sx",".sy",".sz",".tc",".td",".tf",".tg",".th",".tj",".tk",".tl",".tm",".tn",".to",".tp",".tr",
ferencd@0 328 ".tt",".tv",".tw",".tz",".ua",".ug",".uk",".us",".uy",".uz",".va",".vc",".ve",".vg",".vi",".vn",".vu",".wf",
ferencd@0 329 ".ws",".ye",".yt",".za",".zm",".zw"};
ferencd@0 330
ferencd@0 331
ferencd@0 332 static auto with_domains = {window_methods_js, history_methods_js, location_js, document_js, js_events, js_globals, js_navigator,toplevel_domains};
ferencd@0 333 static auto without_domains = {window_methods_js, history_methods_js, location_js, document_js, js_events, js_globals, js_navigator};
ferencd@0 334 static auto containers = remove_domains ? with_domains : without_domains;
ferencd@0 335
ferencd@0 336 std::string result = s;
ferencd@0 337
ferencd@0 338 // First run: HTML tags
ferencd@0 339 for(auto tag : html_tags)
ferencd@0 340 {
ferencd@0 341
ferencd@0 342 // Ddi we parse out all the garbage?
ferencd@0 343 if(result.empty())
ferencd@0 344 {
ferencd@0 345 break;
ferencd@0 346 }
ferencd@0 347
ferencd@0 348 // Zero: Standalone tags
ferencd@0 349 std::string open_tag = std::string("<") + tag + std::string(">");
ferencd@0 350 result = replace(result, open_tag, "");
ferencd@0 351 std::string close_tag = std::string("</") + tag + std::string(">");
ferencd@0 352 result = replace(result, close_tag, "");
ferencd@0 353
ferencd@0 354 // One: Tags which might have parameters, such as: <script language = "blabl">
ferencd@0 355 // Involves parsing out the entire tag
ferencd@0 356
ferencd@0 357 std::string endless_tag = std::string("<") + tag;
ferencd@0 358 std::size_t etpos = result.find(endless_tag);
ferencd@0 359 if( etpos != std::string::npos)
ferencd@0 360 {
ferencd@0 361 std::string tmps = result.substr(etpos);
ferencd@0 362 size_t i = 0; // i will point to the first character after the endless tag
ferencd@0 363 while(i < tmps.length())
ferencd@0 364 {
ferencd@0 365 // skip stuff on double quotes
ferencd@0 366 if(tmps[i] == '\"')
ferencd@0 367 {
ferencd@0 368 i ++;
ferencd@0 369 while(i < tmps.length())
ferencd@0 370 {
ferencd@0 371 // skip the escaped double quotes
ferencd@0 372 if(tmps[i] == '\\') i++;
ferencd@0 373 if(i < tmps.length() && tmps[i] == '\"') i++;
ferencd@0 374
ferencd@0 375 // advance
ferencd@0 376 if(i < tmps.length() ) i++;
ferencd@0 377 }
ferencd@0 378 }
ferencd@0 379
ferencd@0 380 if( i < tmps.length() )
ferencd@0 381 {
ferencd@0 382 if(tmps[i] == '>')
ferencd@0 383 {
ferencd@0 384 break;
ferencd@0 385 }
ferencd@0 386 i++;
ferencd@0 387 }
ferencd@0 388 }
ferencd@0 389
ferencd@0 390 i++;
ferencd@0 391
ferencd@0 392 if( i<tmps.length())
ferencd@0 393 {
ferencd@0 394 result = tmps.substr(i);
ferencd@0 395 }
ferencd@0 396 else
ferencd@0 397 {
ferencd@0 398 result.clear();
ferencd@0 399 }
ferencd@0 400 }
ferencd@0 401 }
ferencd@0 402
ferencd@0 403 // Second run: various javascript stuff
ferencd@0 404 for(auto container : containers)
ferencd@0 405 {
ferencd@0 406 for(auto tag : container)
ferencd@0 407 {
ferencd@0 408 result = replace(result, tag, "");
ferencd@0 409 }
ferencd@0 410 }
ferencd@0 411
ferencd@0 412 again:
ferencd@0 413 size_t i = 0;
ferencd@0 414 while(i < result.length())
ferencd@0 415 {
ferencd@0 416 // the first part
ferencd@0 417 if(isdigit(result[i]))
ferencd@0 418 {
ferencd@0 419 size_t save_i = i;
ferencd@0 420 int p1 = get_number(result, i);
ferencd@0 421 if(p1 == 2)
ferencd@0 422 {
ferencd@0 423 // skip the '.'
ferencd@0 424 i++;
ferencd@0 425 // get the next number
ferencd@0 426 int p2 = get_number(result, i);
ferencd@0 427 if(p2 == 2)
ferencd@0 428 {
ferencd@0 429 i++;
ferencd@0 430 int p3 = get_number(result, i);
ferencd@0 431 if(p3 == 2)
ferencd@0 432 {
ferencd@0 433 // skip '.'
ferencd@0 434 i++;
ferencd@0 435 int p4 = get_number(result, i);
ferencd@0 436 if(p4 == 1)
ferencd@0 437 {
ferencd@0 438 // now remove from result the section between save_i and i
ferencd@0 439 result = result.substr(0, save_i) + result.substr(i);
ferencd@0 440 goto again;
ferencd@0 441 }
ferencd@0 442 }
ferencd@0 443 }
ferencd@0 444 }
ferencd@0 445 else
ferencd@0 446 {
ferencd@0 447 continue;
ferencd@0 448 }
ferencd@0 449
ferencd@0 450 }
ferencd@0 451 else
ferencd@0 452 {
ferencd@0 453 // move to the next character
ferencd@0 454 i++;
ferencd@0 455 }
ferencd@0 456 }
ferencd@0 457
ferencd@0 458 // Third run: Encode the remaining XML/HTML artifacts into web-safe data
ferencd@0 459 encode(result);
ferencd@0 460 return result;
ferencd@0 461 }
ferencd@0 462
ferencd@0 463 // returns: 0 - not a valid IP number, 1 - is not followed by '.', 2 - valid IP number, followed by '.'
ferencd@0 464 static int get_number(const std::string& s, size_t& i)
ferencd@0 465 {
ferencd@0 466 std::string ip1 = "";
ferencd@0 467 while(i < s.length() && isdigit(s[i]))
ferencd@0 468 {
ferencd@0 469 ip1 += s[i++];
ferencd@0 470 }
ferencd@0 471
ferencd@0 472 // is this a valid IP part?
ferencd@0 473 int nip1 = atoi(ip1.c_str());
ferencd@0 474 if(nip1 > 255)
ferencd@0 475 {
ferencd@0 476 return 0;
ferencd@0 477 }
ferencd@0 478
ferencd@0 479 if(s[i] == '.')
ferencd@0 480 {
ferencd@0 481 return 2;
ferencd@0 482 }
ferencd@0 483 return 1;
ferencd@0 484 }
ferencd@0 485
ferencd@0 486 static bool case_insensitive_eq(const char &lhs, const char &rhs)
ferencd@0 487 {
ferencd@0 488 return std::toupper(lhs) == std::toupper(rhs);
ferencd@0 489 }
ferencd@0 490
ferencd@0 491 static bool case_insensitive_string_eq(const std::string &lhs, const std::string &rhs)
ferencd@0 492 {
ferencd@0 493 return std::equal(lhs.begin(),
ferencd@0 494 lhs.end(),
ferencd@0 495 rhs.begin(),
ferencd@0 496 case_insensitive_eq);
ferencd@0 497 }
ferencd@0 498
ferencd@0 499 case_insensitive_str_eq::case_insensitive_str_eq(std::string key) : key_(std::move(key)) {}
ferencd@0 500
ferencd@0 501 bool case_insensitive_str_eq::operator()(const std::string &item) const
ferencd@0 502 {
ferencd@0 503 if(item.length() != key_.length())
ferencd@0 504 {
ferencd@0 505 return false;
ferencd@0 506 }
ferencd@0 507 return case_insensitive_string_eq(item, key_);
ferencd@0 508 }
ferencd@0 509
ferencd@0 510 std::string unafrog::server()
ferencd@0 511 {
ferencd@0 512 return "http://localhost:8838";
ferencd@0 513 }
ferencd@0 514
ferencd@0 515 std::string unafrog::utils::consume(std::string& s, int c)
ferencd@0 516 {
ferencd@0 517 try
ferencd@0 518 {
ferencd@0 519 std::string v = s.substr(0, c);
ferencd@0 520 s.erase(0, c);
ferencd@0 521 return v;
ferencd@0 522 }
ferencd@0 523 catch (std::exception& ex)
ferencd@0 524 {
ferencd@0 525 log_err() << "Cannot consume" << c << "characters from" << s << ": " << ex.what();
ferencd@0 526 return "";
ferencd@0 527 }
ferencd@0 528 catch(...)
ferencd@0 529 {
ferencd@0 530 std::stringstream ss;
ferencd@0 531 ss << "Cannot consume" << c << "characters from" << s;
ferencd@0 532 log_err() << ss.str();
ferencd@0 533 return "";
ferencd@0 534 }
ferencd@0 535 }
ferencd@0 536
ferencd@0 537 std::vector<uint8_t> unafrog::utils::hex_string_to_vector(const std::string &s)
ferencd@0 538 {
ferencd@0 539 std::vector<uint8_t> out;
ferencd@0 540 for (unsigned int i = 0; i < s.length(); i += 2)
ferencd@0 541 {
ferencd@0 542 std::string byteString = s.substr(i, 2);
ferencd@0 543 uint8_t byte = static_cast<uint8_t>(strtol(byteString.c_str(), NULL, 16));
ferencd@0 544 out.push_back(byte);
ferencd@0 545 }
ferencd@0 546 return out;
ferencd@0 547 }
ferencd@0 548
ferencd@0 549 std::string unafrog::utils::string_to_hex(const std::string& input)
ferencd@0 550 {
ferencd@0 551 static const char* const lut = "0123456789abcdef";
ferencd@0 552 size_t len = input.length();
ferencd@0 553
ferencd@0 554 std::string output;
ferencd@0 555 output.reserve(2 * len);
ferencd@0 556 for (size_t i = 0; i < len; ++i)
ferencd@0 557 {
ferencd@0 558 const unsigned char c = input[i];
ferencd@0 559 output.push_back(lut[c >> 4]);
ferencd@0 560 output.push_back(lut[c & 15]);
ferencd@0 561 }
ferencd@0 562 return output;
ferencd@0 563 }
ferencd@0 564
ferencd@0 565 std::string unafrog::utils::hex_to_string(const std::string& input)
ferencd@0 566 {
ferencd@0 567 static const char* const lut = "0123456789abcdef";
ferencd@0 568 size_t len = input.length();
ferencd@0 569 if (len & 1)
ferencd@0 570 {
ferencd@0 571 log_err() << "odd length for " + input;
ferencd@0 572 throw std::exception();
ferencd@0 573 }
ferencd@0 574
ferencd@0 575 std::string output;
ferencd@0 576 output.reserve(len / 2);
ferencd@0 577 for (size_t i = 0; i < len; i += 2)
ferencd@0 578 {
ferencd@0 579 char a = input[i];
ferencd@0 580 const char* p = std::lower_bound(lut, lut + 16, a);
ferencd@0 581 if (*p != a)
ferencd@0 582 {
ferencd@0 583 log_err() << "not a hex digit";
ferencd@0 584 throw std::exception();
ferencd@0 585 }
ferencd@0 586
ferencd@0 587 char b = input[i + 1];
ferencd@0 588 const char* q = std::lower_bound(lut, lut + 16, b);
ferencd@0 589 if (*q != b)
ferencd@0 590 {
ferencd@0 591 log_err() << "not a hex digit in: " + input;
ferencd@0 592 throw std::exception();
ferencd@0 593 }
ferencd@0 594
ferencd@0 595 output.push_back(((p - lut) << 4) | (q - lut));
ferencd@0 596 }
ferencd@0 597 return output;
ferencd@0 598 }
ferencd@0 599
ferencd@0 600 std::string unafrog::utils::grow(const std::string &s, std::size_t required_length)
ferencd@0 601 {
ferencd@0 602 std::string result = s;
ferencd@0 603 while(result.length() < required_length)
ferencd@0 604 {
ferencd@0 605 result = "0" + result;
ferencd@0 606 }
ferencd@0 607 return result;
ferencd@0 608 }
ferencd@0 609
ferencd@0 610 std::string join(const std::vector<std::string> &vec, const char *delim)
ferencd@0 611 {
ferencd@0 612 std::stringstream res;
ferencd@0 613 std::copy(vec.begin(), vec.end(), std::ostream_iterator<std::string>(res, delim));
ferencd@0 614 return res.str();
ferencd@0 615 }
ferencd@0 616
ferencd@0 617 #if !defined __ANDROID__
ferencd@0 618 std::vector<std::string> split(const std::string &s, const char* delim)
ferencd@0 619 {
ferencd@0 620 std::vector<std::string> result;
ferencd@0 621 boost::char_separator<char> sep(delim);
ferencd@0 622 boost::tokenizer<boost::char_separator<char>> tok(s, sep);
ferencd@0 623
ferencd@0 624 for(boost::tokenizer<boost::char_separator<char>>::iterator beg=tok.begin(); beg!=tok.end(); ++beg)
ferencd@0 625 {
ferencd@0 626 result.push_back(*beg);
ferencd@0 627 }
ferencd@0 628
ferencd@0 629 return result;
ferencd@0 630 }
ferencd@0 631 #endif
ferencd@0 632
ferencd@0 633 unsigned internalServerError()
ferencd@0 634 {
ferencd@0 635 return 500; // HTTP_INTERNAL_SERVER_ERROR;
ferencd@0 636 }
ferencd@0 637
ferencd@0 638 std::string remove_duplicates(std::string s, char to_remove)
ferencd@0 639 {
ferencd@0 640 std::string result;
ferencd@0 641 auto i = s.begin();
ferencd@0 642 while(i != s.end())
ferencd@0 643 {
ferencd@0 644 auto pv = i;
ferencd@0 645 result += *i ++;
ferencd@0 646 while(i!=s.end() && *i == *pv && *i == to_remove)
ferencd@0 647 {
ferencd@0 648 ++ i;
ferencd@0 649 }
ferencd@0 650 }
ferencd@0 651
ferencd@0 652 return result;
ferencd@0 653 }
ferencd@0 654
ferencd@0 655 std::string unafrog::utils::sanitize_hostname_web(std::string hn)
ferencd@0 656 {
ferencd@0 657 std::vector<std::pair<char, char>> replacements{
ferencd@0 658 { '-', '_' },
ferencd@0 659 { '.', '_' }
ferencd@0 660 };
ferencd@0 661
ferencd@0 662 for (auto const &r : replacements) {
ferencd@0 663 hn.replace(hn.begin(), hn.end(), r.first, r.second);
ferencd@0 664 }
ferencd@0 665
ferencd@0 666 return hn;
ferencd@0 667 }
ferencd@0 668
ferencd@0 669 void remove_quotes(std::string &s)
ferencd@0 670 {
ferencd@0 671 boost::trim_if(s, boost::is_any_of("\""));
ferencd@0 672 }