Mercurial > thymian
comparison common/common.cpp @ 0:a4671277546c tip
created the repository for the thymian project
| author | ferencd |
|---|---|
| date | Tue, 17 Aug 2021 11:19:54 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a4671277546c |
|---|---|
| 1 #include "common.h" | |
| 2 | |
| 3 #include <boost/tokenizer.hpp> | |
| 4 #include <boost/algorithm/string/predicate.hpp> | |
| 5 #include <boost/algorithm/string.hpp> | |
| 6 | |
| 7 #include <fpaq0.h> | |
| 8 | |
| 9 #include <cstdlib> | |
| 10 #include <ctime> | |
| 11 #include <algorithm> | |
| 12 #include <cassert> | |
| 13 #include <cstring> | |
| 14 #include <cmath> | |
| 15 #include <sstream> | |
| 16 #include <iterator> | |
| 17 #include <cctype> | |
| 18 | |
| 19 // Should stay here, do not move before <algorithm> otherwise strange warnings will come | |
| 20 #include <log.h> | |
| 21 | |
| 22 | |
| 23 const std::string platform() | |
| 24 { | |
| 25 #ifdef __ANDROID__ | |
| 26 return HOSTT_ANDROID; | |
| 27 #elif defined __linux__ | |
| 28 return HOSTT_LINUX; | |
| 29 #elif defined _WIN32 | |
| 30 return HOSTT_WINDOWS; | |
| 31 #else | |
| 32 return HOSTT_UNKNOWN; | |
| 33 #endif | |
| 34 } | |
| 35 | |
| 36 namespace unafrog { namespace utils { | |
| 37 | |
| 38 namespace random { | |
| 39 | |
| 40 std::string random_string( size_t length, unafrog::utils::random::random_string_class cls ) | |
| 41 { | |
| 42 auto randchar = [cls]() -> char | |
| 43 { | |
| 44 auto charset = [cls]() -> std::string { | |
| 45 switch (cls) { | |
| 46 case unafrog::utils::random::random_string_class::RSC_DEC: | |
| 47 return "0123456789"; | |
| 48 case unafrog::utils::random::random_string_class::RSC_HEX: | |
| 49 return "0123456789abcdef"; | |
| 50 case unafrog::utils::random::random_string_class::RSC_ASC_DEC: | |
| 51 return "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"; | |
| 52 case unafrog::utils::random::random_string_class::RSC_B64: | |
| 53 return "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ+/"; | |
| 54 case unafrog::utils::random::random_string_class::RSC_FULL: | |
| 55 return "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ|!#$%&/()=?{[]}+\\-_.:,;'*^"; | |
| 56 } | |
| 57 return "10"; | |
| 58 }(); | |
| 59 | |
| 60 const size_t max_index = (charset.length() - 1); | |
| 61 return charset[ rand() % max_index ]; | |
| 62 }; | |
| 63 std::string str(length, 0); | |
| 64 std::generate_n( str.begin(), length, randchar ); | |
| 65 return str; | |
| 66 } | |
| 67 | |
| 68 } //random | |
| 69 | |
| 70 namespace b62 { | |
| 71 | |
| 72 static const char base62_vals[] = "0123456789" | |
| 73 "abcdefghijklmnopqrstuvwxyz" | |
| 74 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; | |
| 75 | |
| 76 static const int base62_index[] = { | |
| 77 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 78 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 79 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 80 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | |
| 81 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0, 0, | |
| 82 0, 0, 0, 0, 0, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, | |
| 83 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, | |
| 84 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0, 0, 0, 0, 0, | |
| 85 0, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, | |
| 86 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, | |
| 87 0x21, 0x22, 0x23, | |
| 88 }; | |
| 89 | |
| 90 void strreverse_inplace (char *str) | |
| 91 { | |
| 92 char c; | |
| 93 int half; | |
| 94 int len; | |
| 95 int i; | |
| 96 | |
| 97 len = strlen(str); | |
| 98 half = len >> 1; | |
| 99 for (i = 0; i < half; i++) { | |
| 100 c = str[i]; | |
| 101 str[i] = str[len - i - 1]; | |
| 102 str[len - i - 1] = c; | |
| 103 } | |
| 104 } | |
| 105 | |
| 106 std::string base62_encode (uint64_t val) | |
| 107 { | |
| 108 char str[128] = {0}; | |
| 109 size_t i = 0, len = 128; | |
| 110 int v; | |
| 111 | |
| 112 assert(str); | |
| 113 assert(len > 0); | |
| 114 | |
| 115 do { | |
| 116 if (i + 1 >= len) | |
| 117 return ""; | |
| 118 v = val % 62; | |
| 119 str[i++] = base62_vals[v]; | |
| 120 val = (val - v) / 62; | |
| 121 } while (val > 0); | |
| 122 str[i] = '\0'; | |
| 123 strreverse_inplace(str); | |
| 124 | |
| 125 return std::string(str); | |
| 126 } | |
| 127 | |
| 128 uint64_t base62_decode (const std::string& str) | |
| 129 { | |
| 130 uint64_t val = 0; | |
| 131 char c; | |
| 132 int len; | |
| 133 int i; | |
| 134 | |
| 135 len = str.length(); | |
| 136 for (i = 0; i < len; i++) { | |
| 137 c = str[i]; | |
| 138 if (!isalnum(c)) { | |
| 139 return -1; | |
| 140 } | |
| 141 val += base62_index[(int)c] *(uint64_t) powl(62, len - i - 1); | |
| 142 } | |
| 143 | |
| 144 return val; | |
| 145 } | |
| 146 | |
| 147 } // b62 | |
| 148 | |
| 149 std::string to_upper(const std::string &s) | |
| 150 { | |
| 151 std::string res = s; | |
| 152 std::transform(res.begin(), res.end(), res.begin(), ::toupper); | |
| 153 return res; | |
| 154 } | |
| 155 | |
| 156 }} // unafrog::utils | |
| 157 | |
| 158 static std::string replace(std::string subject, const std::string& search, const std::string& replace) | |
| 159 { | |
| 160 size_t pos = 0; | |
| 161 while ((pos = subject.find(search, pos)) != std::string::npos) | |
| 162 { | |
| 163 subject.replace(pos, search.length(), replace); | |
| 164 pos += replace.length(); | |
| 165 } | |
| 166 return subject; | |
| 167 } | |
| 168 | |
| 169 static void encode(std::string& data) | |
| 170 { | |
| 171 std::string buffer; | |
| 172 buffer.reserve(data.size()); | |
| 173 for(size_t pos = 0; pos != data.size(); ++pos) | |
| 174 { | |
| 175 switch(data[pos]) | |
| 176 { | |
| 177 case '&': buffer.append("&"); break; | |
| 178 case '\"': buffer.append("""); break; | |
| 179 case '\'': buffer.append("'"); break; | |
| 180 case '<': buffer.append("<"); break; | |
| 181 case '>': buffer.append(">"); break; | |
| 182 default: buffer.append(&data[pos], 1); break; | |
| 183 } | |
| 184 } | |
| 185 data.swap(buffer); | |
| 186 } | |
| 187 | |
| 188 // forward declaration | |
| 189 static int get_number(const std::string& s, size_t &i); | |
| 190 | |
| 191 std::string unafrog::utils::sanitize_user_input(const std::string &s, bool remove_domains) | |
| 192 { | |
| 193 static auto html_tags = {"a", "abbr", "address", "area", "article", | |
| 194 "aside", "audio", "b", "base", "bdi", "bdo", | |
| 195 "blockquote", "body", "br", "button", "canvas", "caption", | |
| 196 "cite", "code", "col", "colgroup", "data", "datalist", | |
| 197 "dd", "del", "dfn", "div", "dl", "dt", "em", "embed", "fieldset", | |
| 198 "figcaption", "figure", "footer", "form", "h1", "h2", "h3", "h4", | |
| 199 "h5", "h6", "head", "header", "hr", "html", "i", "iframe", "img", | |
| 200 "input", "ins", "kbd", "keygen", "label", "legend", "li", "link", | |
| 201 "main", "map", "mark", "meta", "meter", "nav", "noscript", | |
| 202 "object", "ol", "optgroup", "option", "output", "p", "param", | |
| 203 "pre", "progress", "q", "rb", "rp", "rt", "rtc", "ruby", "s", | |
| 204 "samp", "script", "section", "select", "small", "source", "span", | |
| 205 "strong", "style", "sub", "sup", "table", "tbody", "td", | |
| 206 "template", "textarea", "tfoot", "th", "thead", "time", "title", | |
| 207 "tr", "track", "u", "ul", "var", "video", "wbr", "--#"}; | |
| 208 | |
| 209 static auto window_methods_js = {"window.alert", "window.atob", "window.blur", "window.btoa", | |
| 210 "window.clearInterval", "window.clearTimeout", "window.close", | |
| 211 "window.confirm", "window.createPopup", "window.focus", "window.getComputedStyle", | |
| 212 "window.getSelection", "window.matchMedia", "window.moveBy", "window.moveTo", | |
| 213 "window.open", "window.print", "window.prompt", "window.resizeBy", | |
| 214 "window.resizeTo", "window.scroll", "window.scrollBy", "window.scrollTo", | |
| 215 "window.setInterval", "window.setTimeout", "window.stop", | |
| 216 // Should be the last one due to the way the history objects are being accessed | |
| 217 "window." }; | |
| 218 | |
| 219 static auto history_methods_js = {"history.back", "history.forward", "history.go"}; | |
| 220 | |
| 221 static auto location_js = {"location.hash", "location.host", "location.hostname", "location.href", "location.origin", | |
| 222 "location.pathname", "location.port", "location.protocol", "location.search", | |
| 223 "location.assign", "location.reload", "location.replace" }; | |
| 224 | |
| 225 static auto document_js = {"document.activeElement", "document.addEventListener", "document.adoptNode", | |
| 226 "document.anchors", "document.applets", "document.baseURI", "document.body", | |
| 227 "document.close", "document.cookie", "document.createAttribute", "document.createComment", | |
| 228 "document.createDocumentFragment", "document.createElement", "document.createTextNode", | |
| 229 "document.doctype", "document.documentElement", "document.documentMode", | |
| 230 "document.documentURI", "document.domain", "document.domConfig", "document.embeds", | |
| 231 "document.forms", "document.getElementById", "document.getElementsByClassName", | |
| 232 "document.getElementsByName", "document.getElementsByTagName", "document.hasFocus", | |
| 233 "document.head", "document.images", "document.implementation", "document.importNode", | |
| 234 "document.inputEncoding", "document.lastModified", "document.links", | |
| 235 "document.normalize", "document.normalizeDocument", "document.open", "document.querySelector", | |
| 236 "document.querySelectorAll", "document.readyState", "document.referrer", | |
| 237 "document.removeEventListener", "document.renameNode", "document.scripts", | |
| 238 "document.strictErrorChecking", "document.title", "document.URL", "document.write", | |
| 239 "document.writeln"}; | |
| 240 | |
| 241 static auto js_events = {"onclick", "oncontextmenu", "ondblclick", "onmousedown", "onmouseenter", "onmouseleave", | |
| 242 "onmousemove", "onmouseover", "onmouseout", "onmouseup", "onkeydown", "onkeypress", "onkeyup", | |
| 243 "onabort", "onbeforeunload", "onerror", "onhashchange", "onload", "onpageshow", "onpagehide", | |
| 244 "onresize", "onscroll", "onunload", "onblur", "onchange", "onfocus", "onfocusin", "onfocusout", | |
| 245 "oninput", "oninvalid", "onreset", "onsearch", "onselect", "onsubmit", "ondrag", "ondragend", | |
| 246 "ondragenter", "ondragleave", "ondragover", "ondragstart", "ondrop", "oncopy", "oncut", | |
| 247 "onpaste", "onafterprint", "onbeforeprint", "onabort", "oncanplay", "oncanplaythrough", | |
| 248 "ondurationchange", "onemptied", "onended", "onerror", "onloadeddata", "onloadedmetadata", | |
| 249 "onloadstart", "onpause", "onplay", "onplaying", "onprogress", "onratechange", "onseeked", | |
| 250 "onseeking", "onstalled", "onsuspend", "ontimeupdate", "onvolumechange", "onwaiting", | |
| 251 "animationend", "animationiteration", "animationstart", "transitionend", "onerror", | |
| 252 "onmessage", "onopen", "onmessage", "onmousewheel", "ononline", "onoffline", "onpopstate", | |
| 253 "onshow", "onstorage", "ontoggle", "onwheel", "ontouchcancel", "ontouchend", "ontouchmove", | |
| 254 "ontouchstart", "cancelable", "currentTarget", "defaultPrevented", "eventPhase", | |
| 255 "isTrusted", "timeStamp", "preventDefault", "stopImmediatePropagation", "stopPropagation", | |
| 256 "altKey", "clientX", "clientY", "ctrlKey", "metaKey", "pageX", "pageY", "relatedTarget", | |
| 257 "screenX", "screenY", "shiftKey", "altKey", "ctrlKey", "charCode", "keyCode", | |
| 258 "metaKey", "shiftKey", "newURL", "oldURL", "relatedTarget", "animationName", "elapsedTime", | |
| 259 "propertyName", "elapsedTime", "deltaX", "deltaY", "deltaZ", "deltaMode" }; | |
| 260 | |
| 261 static auto js_globals = {"decodeURI","decodeURIComponent", "encodeURI", "encodeURIComponent", | |
| 262 "eval", "isFinite", "isNaN", "Number", "parseFloat", "parseInt", "String", "unescape" }; | |
| 263 | |
| 264 static auto js_navigator = {"appCodeName", "appName", "appVersion", "cookieEnabled", | |
| 265 "geolocation", "onLine", "userAgent" }; | |
| 266 | |
| 267 static auto toplevel_domains = {".academy",".accountant",".accountants",".cloud",".active",".actor",".adult",".aero",".agency",".airforce", | |
| 268 ".apartments",".app",".archi",".army",".associates",".attorney",".auction",".audio",".autos",".band",".bar", | |
| 269 ".bargains",".beer",".best",".bid",".bike",".bingo",".bio",".biz",".black",".blackfriday",".blog",".blue", | |
| 270 ".boo",".boutique",".build",".builders",".business",".buzz",".cab",".camera",".camp",".cancerresearch", | |
| 271 ".capital",".cards",".care",".career",".careers",".cash",".casino",".catering",".center",".ceo",".channel", | |
| 272 ".chat",".cheap",".christmas",".church",".city",".claims",".cleaning",".click",".clinic",".clothing", | |
| 273 ".club",".coach",".codes",".coffee",".college",".community",".company",".computer",".condos",".construction", | |
| 274 ".consulting",".contractors",".cooking",".cool",".coop",".country",".coupons",".credit",".creditcard", | |
| 275 ".cricket",".cruises",".dad",".dance",".date",".dating",".day",".deals",".degree",".delivery",".democrat", | |
| 276 ".dental",".dentist",".design",".diamonds",".diet",".digital",".direct",".directory",".discount",".dog", | |
| 277 ".domains",".download",".eat",".education",".email",".energy",".engineer",".engineering",".equipment", | |
| 278 ".esq",".estate",".events",".exchange",".expert",".exposed",".express",".fail",".faith",".family", | |
| 279 ".fans",".farm",".fashion",".pid",".finance",".financial",".fish",".fishing",".fit",".fitness",".flights", | |
| 280 ".florist",".flowers",".fly",".foo",".football",".forsale",".foundation",".fund",".furniture",".fyi", | |
| 281 ".gallery",".garden",".gift",".gifts",".gives",".glass",".global",".gold",".golf",".gop",".graphics", | |
| 282 ".green",".gripe",".guide",".guitars",".guru",".healthcare",".help",".here",".hiphop",".hiv",".hockey", | |
| 283 ".holdings",".holiday",".homes",".horse",".host",".hosting",".house",".how",".info",".ing",".ink", | |
| 284 ".institute[59]",".insure",".international",".investments",".jewelry",".jobs",".kim",".kitchen",".land", | |
| 285 ".lawyer",".lease",".legal",".lgbt",".life",".lighting",".limited",".limo",".link",".loan",".loans", | |
| 286 ".lol",".lotto",".love",".luxe",".luxury",".management",".market",".marketing",".markets",".mba",".media", | |
| 287 ".meet",".meme",".memorial",".men",".menu",".mobi",".moe",".money",".mortgage",".motorcycles",".mov", | |
| 288 ".movie",".museum",".name",".navy",".network",".new",".news",".ngo",".ninja",".one",".ong",".onl", | |
| 289 ".online",".ooo",".organic",".partners",".parts",".party",".pharmacy",".photo",".photography",".photos", | |
| 290 ".physio",".pics",".pictures",".feedback",".pink",".pizza",".place",".plumbing",".plus",".poker",".porn", | |
| 291 ".post",".press",".pro",".productions",".prof",".properties",".property",".qpon",".racing",".recipes", | |
| 292 ".red",".rehab",".ren",".rent",".rentals",".repair",".report",".republican",".rest",".review",".reviews", | |
| 293 ".rich",".rip",".rocks",".rodeo",".rsvp",".run",".sale",".school",".science",".services",".sex",".sexy", | |
| 294 ".shoes",".show",".singles",".site",".soccer",".social",".software",".solar",".solutions",".space", | |
| 295 ".studio",".style",".sucks",".supplies",".supply",".support",".surf",".surgery",".systems",".tattoo", | |
| 296 ".tax",".taxi",".team",".tech",".technology",".tel",".tennis",".theater",".tips",".tires",".today", | |
| 297 ".tools",".top",".tours",".town",".toys",".trade",".training",".travel",".university",".vacations", | |
| 298 ".vet",".video",".villas",".vision",".vodka",".vote",".voting",".voyage",".wang",".watch",".webcam", | |
| 299 ".website",".wed",".wedding",".whoswho",".wiki",".win",".wine",".work",".works",".world",".wtf", | |
| 300 ".xxx",".xyz",".yoga",".zone",".maison",".abogado",".gratis",".futbol",".juegos",".soy",".tienda", | |
| 301 ".uno",".viajes",".haus",".immobilien",".jetzt",".kaufen",".reise",".reisen",".schule",".versicherung", | |
| 302 ".desi",".shiksha",".casa",".cafe",".immo",".moda",".voto",".bar",".bank",".coop",".enterprises", | |
| 303 ".industries",".institute",".ltda",".pub",".realtor",".reit",".rest",".restaurant",".sarl",".ventures", | |
| 304 ".capetown",".durban",".joburg",".asia",".krd",".nagoya",".okinawa",".ryukyu",".taipei",".tatar",".tokyo", | |
| 305 ".yokohama",".alsace",".amsterdam",".barcelona",".bayern",".berlin",".brussels",".budapest",".bzh", | |
| 306 ".cat",".cologne",".corsica",".cymru",".eus",".frl",".gal",".gent",".hamburg",".irish",".koeln",".london", | |
| 307 ".madrid",".moscow",".nrw",".paris",".ruhr",".saarland",".scot",".tirol",".vlaanderen",".wales",".wien", | |
| 308 ".zuerich",".miami",".nyc",".quebec",".vegas",".kiwi",".melbourne",".sydney",".lat",".rio",".allfinanz", | |
| 309 ".android",".aquarelle",".axa",".barclays",".barclaycard",".bloomberg",".bmw",".bnl",".bnpparibas",".cal", | |
| 310 ".caravan",".cern",".chrome",".citic",".crs",".cuisinella",".dnp",".dvag",".emerck",".everbank",".firmdale", | |
| 311 ".flsmidth",".frogans",".gbiz",".gle",".globo",".gmail",".gmo",".gmx",".google",".hsbc",".ibm",".kred", | |
| 312 ".lacaixa",".latrobe",".lds",".mango",".mini",".monash",".mormon",".neustar",".nexus",".nhk",".nico",".nra", | |
| 313 ".otsuka",".ovh",".piaget",".pohl",".praxi",".prod",".pwc",".sandvikcoromant",".sca",".scb",".schmidt",".sohu", | |
| 314 ".spiegel",".suzuki",".tui",".uol",".williamhill",".wme",".wtc",".yandex",".youtube",".com",".org",".net", | |
| 315 ".int",".edu",".gov",".mil",".arpa",".ac",".ad",".ae",".af",".ag",".ai",".al",".am",".an",".ao",".aq",".ar", | |
| 316 ".as",".at",".au",".aw",".ax",".az",".ba",".bb",".bd",".be",".bf",".bg",".bh",".bi",".bj",".bm",".bn",".bo", | |
| 317 ".bq",".br",".bs",".bt",".bv",".bw",".by",".bz",".ca",".cc",".cd",".cf",".cg",".ch",".ci",".ck",".cl",".cm", | |
| 318 ".cn",".co",".cr",".cu",".cv",".cw",".cx",".cy",".cz",".de",".dj",".dk",".dm",".do",".dz",".ec",".ee",".eg", | |
| 319 ".eh",".er",".es",".et",".eu",".fi",".fj",".fk",".fm",".fo",".fr",".ga",".gb",".gd",".ge",".gf",".gg",".gh", | |
| 320 ".gi",".gl",".gm",".gn",".gp",".gq",".gr",".gs",".gt",".gu",".gw",".gy",".hk",".hm",".hn",".hr",".ht",".hu", | |
| 321 ".id",".ie",".il",".im",".in",".io",".iq",".ir",".is",".it",".je",".jm",".jo",".jp",".ke",".kg",".kh",".ki", | |
| 322 ".km",".kn",".kp",".kr",".kw",".ky",".kz",".la",".lb",".lc",".li",".lk",".lr",".ls",".lt",".lu",".lv",".ly", | |
| 323 ".ma",".mc",".md",".me",".mg",".mh",".mk",".ml",".mm",".mn",".mo",".mp",".mq",".mr",".ms",".mt",".mu",".mv", | |
| 324 ".mw",".mx",".my",".mz",".na",".nc",".ne",".nf",".ng",".ni",".nl",".no",".np",".nr",".nu",".nz",".om",".pa", | |
| 325 ".pe",".pf",".pg",".ph",".pk",".pl",".pm",".pn",".pr",".ps",".pt",".pw",".py",".qa",".re",".ro",".rs",".ru", | |
| 326 ".rw",".sa",".sb",".sc",".sd",".se",".sg",".authenticator.cloudy.sh",".si",".sj",".sk",".sl",".sm",".sn",".so",".sr",".ss",".st", | |
| 327 ".su",".sv",".sx",".sy",".sz",".tc",".td",".tf",".tg",".th",".tj",".tk",".tl",".tm",".tn",".to",".tp",".tr", | |
| 328 ".tt",".tv",".tw",".tz",".ua",".ug",".uk",".us",".uy",".uz",".va",".vc",".ve",".vg",".vi",".vn",".vu",".wf", | |
| 329 ".ws",".ye",".yt",".za",".zm",".zw"}; | |
| 330 | |
| 331 | |
| 332 static auto with_domains = {window_methods_js, history_methods_js, location_js, document_js, js_events, js_globals, js_navigator,toplevel_domains}; | |
| 333 static auto without_domains = {window_methods_js, history_methods_js, location_js, document_js, js_events, js_globals, js_navigator}; | |
| 334 static auto containers = remove_domains ? with_domains : without_domains; | |
| 335 | |
| 336 std::string result = s; | |
| 337 | |
| 338 // First run: HTML tags | |
| 339 for(auto tag : html_tags) | |
| 340 { | |
| 341 | |
| 342 // Ddi we parse out all the garbage? | |
| 343 if(result.empty()) | |
| 344 { | |
| 345 break; | |
| 346 } | |
| 347 | |
| 348 // Zero: Standalone tags | |
| 349 std::string open_tag = std::string("<") + tag + std::string(">"); | |
| 350 result = replace(result, open_tag, ""); | |
| 351 std::string close_tag = std::string("</") + tag + std::string(">"); | |
| 352 result = replace(result, close_tag, ""); | |
| 353 | |
| 354 // One: Tags which might have parameters, such as: <script language = "blabl"> | |
| 355 // Involves parsing out the entire tag | |
| 356 | |
| 357 std::string endless_tag = std::string("<") + tag; | |
| 358 std::size_t etpos = result.find(endless_tag); | |
| 359 if( etpos != std::string::npos) | |
| 360 { | |
| 361 std::string tmps = result.substr(etpos); | |
| 362 size_t i = 0; // i will point to the first character after the endless tag | |
| 363 while(i < tmps.length()) | |
| 364 { | |
| 365 // skip stuff on double quotes | |
| 366 if(tmps[i] == '\"') | |
| 367 { | |
| 368 i ++; | |
| 369 while(i < tmps.length()) | |
| 370 { | |
| 371 // skip the escaped double quotes | |
| 372 if(tmps[i] == '\\') i++; | |
| 373 if(i < tmps.length() && tmps[i] == '\"') i++; | |
| 374 | |
| 375 // advance | |
| 376 if(i < tmps.length() ) i++; | |
| 377 } | |
| 378 } | |
| 379 | |
| 380 if( i < tmps.length() ) | |
| 381 { | |
| 382 if(tmps[i] == '>') | |
| 383 { | |
| 384 break; | |
| 385 } | |
| 386 i++; | |
| 387 } | |
| 388 } | |
| 389 | |
| 390 i++; | |
| 391 | |
| 392 if( i<tmps.length()) | |
| 393 { | |
| 394 result = tmps.substr(i); | |
| 395 } | |
| 396 else | |
| 397 { | |
| 398 result.clear(); | |
| 399 } | |
| 400 } | |
| 401 } | |
| 402 | |
| 403 // Second run: various javascript stuff | |
| 404 for(auto container : containers) | |
| 405 { | |
| 406 for(auto tag : container) | |
| 407 { | |
| 408 result = replace(result, tag, ""); | |
| 409 } | |
| 410 } | |
| 411 | |
| 412 again: | |
| 413 size_t i = 0; | |
| 414 while(i < result.length()) | |
| 415 { | |
| 416 // the first part | |
| 417 if(isdigit(result[i])) | |
| 418 { | |
| 419 size_t save_i = i; | |
| 420 int p1 = get_number(result, i); | |
| 421 if(p1 == 2) | |
| 422 { | |
| 423 // skip the '.' | |
| 424 i++; | |
| 425 // get the next number | |
| 426 int p2 = get_number(result, i); | |
| 427 if(p2 == 2) | |
| 428 { | |
| 429 i++; | |
| 430 int p3 = get_number(result, i); | |
| 431 if(p3 == 2) | |
| 432 { | |
| 433 // skip '.' | |
| 434 i++; | |
| 435 int p4 = get_number(result, i); | |
| 436 if(p4 == 1) | |
| 437 { | |
| 438 // now remove from result the section between save_i and i | |
| 439 result = result.substr(0, save_i) + result.substr(i); | |
| 440 goto again; | |
| 441 } | |
| 442 } | |
| 443 } | |
| 444 } | |
| 445 else | |
| 446 { | |
| 447 continue; | |
| 448 } | |
| 449 | |
| 450 } | |
| 451 else | |
| 452 { | |
| 453 // move to the next character | |
| 454 i++; | |
| 455 } | |
| 456 } | |
| 457 | |
| 458 // Third run: Encode the remaining XML/HTML artifacts into web-safe data | |
| 459 encode(result); | |
| 460 return result; | |
| 461 } | |
| 462 | |
| 463 // returns: 0 - not a valid IP number, 1 - is not followed by '.', 2 - valid IP number, followed by '.' | |
| 464 static int get_number(const std::string& s, size_t& i) | |
| 465 { | |
| 466 std::string ip1 = ""; | |
| 467 while(i < s.length() && isdigit(s[i])) | |
| 468 { | |
| 469 ip1 += s[i++]; | |
| 470 } | |
| 471 | |
| 472 // is this a valid IP part? | |
| 473 int nip1 = atoi(ip1.c_str()); | |
| 474 if(nip1 > 255) | |
| 475 { | |
| 476 return 0; | |
| 477 } | |
| 478 | |
| 479 if(s[i] == '.') | |
| 480 { | |
| 481 return 2; | |
| 482 } | |
| 483 return 1; | |
| 484 } | |
| 485 | |
| 486 static bool case_insensitive_eq(const char &lhs, const char &rhs) | |
| 487 { | |
| 488 return std::toupper(lhs) == std::toupper(rhs); | |
| 489 } | |
| 490 | |
| 491 static bool case_insensitive_string_eq(const std::string &lhs, const std::string &rhs) | |
| 492 { | |
| 493 return std::equal(lhs.begin(), | |
| 494 lhs.end(), | |
| 495 rhs.begin(), | |
| 496 case_insensitive_eq); | |
| 497 } | |
| 498 | |
| 499 case_insensitive_str_eq::case_insensitive_str_eq(std::string key) : key_(std::move(key)) {} | |
| 500 | |
| 501 bool case_insensitive_str_eq::operator()(const std::string &item) const | |
| 502 { | |
| 503 if(item.length() != key_.length()) | |
| 504 { | |
| 505 return false; | |
| 506 } | |
| 507 return case_insensitive_string_eq(item, key_); | |
| 508 } | |
| 509 | |
| 510 std::string unafrog::server() | |
| 511 { | |
| 512 return "http://localhost:8838"; | |
| 513 } | |
| 514 | |
| 515 std::string unafrog::utils::consume(std::string& s, int c) | |
| 516 { | |
| 517 try | |
| 518 { | |
| 519 std::string v = s.substr(0, c); | |
| 520 s.erase(0, c); | |
| 521 return v; | |
| 522 } | |
| 523 catch (std::exception& ex) | |
| 524 { | |
| 525 log_err() << "Cannot consume" << c << "characters from" << s << ": " << ex.what(); | |
| 526 return ""; | |
| 527 } | |
| 528 catch(...) | |
| 529 { | |
| 530 std::stringstream ss; | |
| 531 ss << "Cannot consume" << c << "characters from" << s; | |
| 532 log_err() << ss.str(); | |
| 533 return ""; | |
| 534 } | |
| 535 } | |
| 536 | |
| 537 std::vector<uint8_t> unafrog::utils::hex_string_to_vector(const std::string &s) | |
| 538 { | |
| 539 std::vector<uint8_t> out; | |
| 540 for (unsigned int i = 0; i < s.length(); i += 2) | |
| 541 { | |
| 542 std::string byteString = s.substr(i, 2); | |
| 543 uint8_t byte = static_cast<uint8_t>(strtol(byteString.c_str(), NULL, 16)); | |
| 544 out.push_back(byte); | |
| 545 } | |
| 546 return out; | |
| 547 } | |
| 548 | |
| 549 std::string unafrog::utils::string_to_hex(const std::string& input) | |
| 550 { | |
| 551 static const char* const lut = "0123456789abcdef"; | |
| 552 size_t len = input.length(); | |
| 553 | |
| 554 std::string output; | |
| 555 output.reserve(2 * len); | |
| 556 for (size_t i = 0; i < len; ++i) | |
| 557 { | |
| 558 const unsigned char c = input[i]; | |
| 559 output.push_back(lut[c >> 4]); | |
| 560 output.push_back(lut[c & 15]); | |
| 561 } | |
| 562 return output; | |
| 563 } | |
| 564 | |
| 565 std::string unafrog::utils::hex_to_string(const std::string& input) | |
| 566 { | |
| 567 static const char* const lut = "0123456789abcdef"; | |
| 568 size_t len = input.length(); | |
| 569 if (len & 1) | |
| 570 { | |
| 571 log_err() << "odd length for " + input; | |
| 572 throw std::exception(); | |
| 573 } | |
| 574 | |
| 575 std::string output; | |
| 576 output.reserve(len / 2); | |
| 577 for (size_t i = 0; i < len; i += 2) | |
| 578 { | |
| 579 char a = input[i]; | |
| 580 const char* p = std::lower_bound(lut, lut + 16, a); | |
| 581 if (*p != a) | |
| 582 { | |
| 583 log_err() << "not a hex digit"; | |
| 584 throw std::exception(); | |
| 585 } | |
| 586 | |
| 587 char b = input[i + 1]; | |
| 588 const char* q = std::lower_bound(lut, lut + 16, b); | |
| 589 if (*q != b) | |
| 590 { | |
| 591 log_err() << "not a hex digit in: " + input; | |
| 592 throw std::exception(); | |
| 593 } | |
| 594 | |
| 595 output.push_back(((p - lut) << 4) | (q - lut)); | |
| 596 } | |
| 597 return output; | |
| 598 } | |
| 599 | |
| 600 std::string unafrog::utils::grow(const std::string &s, std::size_t required_length) | |
| 601 { | |
| 602 std::string result = s; | |
| 603 while(result.length() < required_length) | |
| 604 { | |
| 605 result = "0" + result; | |
| 606 } | |
| 607 return result; | |
| 608 } | |
| 609 | |
| 610 std::string join(const std::vector<std::string> &vec, const char *delim) | |
| 611 { | |
| 612 std::stringstream res; | |
| 613 std::copy(vec.begin(), vec.end(), std::ostream_iterator<std::string>(res, delim)); | |
| 614 return res.str(); | |
| 615 } | |
| 616 | |
| 617 #if !defined __ANDROID__ | |
| 618 std::vector<std::string> split(const std::string &s, const char* delim) | |
| 619 { | |
| 620 std::vector<std::string> result; | |
| 621 boost::char_separator<char> sep(delim); | |
| 622 boost::tokenizer<boost::char_separator<char>> tok(s, sep); | |
| 623 | |
| 624 for(boost::tokenizer<boost::char_separator<char>>::iterator beg=tok.begin(); beg!=tok.end(); ++beg) | |
| 625 { | |
| 626 result.push_back(*beg); | |
| 627 } | |
| 628 | |
| 629 return result; | |
| 630 } | |
| 631 #endif | |
| 632 | |
| 633 unsigned internalServerError() | |
| 634 { | |
| 635 return 500; // HTTP_INTERNAL_SERVER_ERROR; | |
| 636 } | |
| 637 | |
| 638 std::string remove_duplicates(std::string s, char to_remove) | |
| 639 { | |
| 640 std::string result; | |
| 641 auto i = s.begin(); | |
| 642 while(i != s.end()) | |
| 643 { | |
| 644 auto pv = i; | |
| 645 result += *i ++; | |
| 646 while(i!=s.end() && *i == *pv && *i == to_remove) | |
| 647 { | |
| 648 ++ i; | |
| 649 } | |
| 650 } | |
| 651 | |
| 652 return result; | |
| 653 } | |
| 654 | |
| 655 std::string unafrog::utils::sanitize_hostname_web(std::string hn) | |
| 656 { | |
| 657 std::vector<std::pair<char, char>> replacements{ | |
| 658 { '-', '_' }, | |
| 659 { '.', '_' } | |
| 660 }; | |
| 661 | |
| 662 for (auto const &r : replacements) { | |
| 663 hn.replace(hn.begin(), hn.end(), r.first, r.second); | |
| 664 } | |
| 665 | |
| 666 return hn; | |
| 667 } | |
| 668 | |
| 669 void remove_quotes(std::string &s) | |
| 670 { | |
| 671 boost::trim_if(s, boost::is_any_of("\"")); | |
| 672 } |
