Mercurial > thymian
comparison server/python/tmod.py @ 0:a4671277546c tip
created the repository for the thymian project
| author | ferencd |
|---|---|
| date | Tue, 17 Aug 2021 11:19:54 +0200 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:a4671277546c |
|---|---|
| 1 import json | |
| 2 import os | |
| 3 import shutil | |
| 4 import re | |
| 5 import sys | |
| 6 | |
| 7 import unidecode | |
| 8 import pprint | |
| 9 import string | |
| 10 | |
| 11 all_recipe_keys = [ {1 : "furry_bread"}, {2 : "bread_cake"}, {3 : "fried_mushrooms"}, {4 : "jewish_eggs"}, | |
| 12 {5 : "mushroom_pancake"}, {6 : "mashed_aubergine"}, {7 : "boiled_bacon"}, {8 : "vegetable_spread"}, | |
| 13 {9 : "homemade_pate"}, {10 : "transylvanian_omelette"}, {11 : "beef_salad"}, | |
| 14 {12 : "polenta_with_cheese"}, {13 : "clear_chicken_soup"}, {14 : "clear_beef_soup"}, | |
| 15 {15 : "goulash_soup"}, {16 : "fried_meat_soup"}, {17 : "stew_soup"}, {18 : "sauerkraut_juice_soup"}, | |
| 16 {19 : "sauerkraut_soup"}, {20 : "cabbage_soup"}, {21 : "stuffed_peppers"}, {22 : "stuffed_leaves"}, | |
| 17 {23 : "stuffed_squash"}, {24 : "stuffed_kohlrabi"}, {25 : "stuffed_onion"}, | |
| 18 {26 : "bean_soup_with_tomato"}, {27 : "bean_soup_with_tarragon"}, {28 : "horseradish_soup"}, | |
| 19 {29 : "potato_soup_with_tarragon"}, {30 : "lamb_soup_with_tarragon"}, | |
| 20 {31 : "lettuce_soup_with_meat"}, {32 : "bean___lettuce_soup"}, {33 : "lettuce_soup_with_eggs"}, | |
| 21 {34 : "spinach_soup"}, {35 : "cauliflower_soup"}, {36 : "garlic_soup"}, {37 : "bread_soup"}, | |
| 22 {38 : "thick_vegetable_soup"}, {39 : "the_fastest_lettuce_soup_ever"}, {40 : "french_bean_soup"}, | |
| 23 {41 : "egg_soup_from_transylvania"}, {42 : "amazingly_simple_potato_soup"}, | |
| 24 {43 : "cabbage_tomato_soup"}, {44 : "sour_mushroom_soup"}, {45 : "mushroom_soup"}, | |
| 25 {46 : "tomato_soup"}, {47 : "tiny_flour_dumpling_soup"}, {48 : "apple_soup"}, | |
| 26 {49 : "sour_cherry_soup"}, {50 : "dried_prune_soup"}, {52 : "meatball_soup"}, | |
| 27 {53 : "meatball_soup_with_tarragon"}, {54 : "chicken_soup_with_vegetables"}, | |
| 28 {55 : "ratatouille_from_transylvania"}, {56 : "gipsy_ratatouille"}, {57 : "potato_goulash"}, | |
| 29 {58 : "mixed_stew"}, {59 : "risotto_ala_transylvania"}, {60 : "schnitzel"}, | |
| 30 {61 : "butter_fried_chicken"}, {62 : "meat_roasted_with_onions"}, {63 : "roasted_meat_ala_brasov"}, | |
| 31 {64 : "meatballs"}, {65 : "smalls"}, {66 : "lamb_cake"}, {67 : "grill_ala_transylvania"}, | |
| 32 {68 : "chicken_paprikas"}, {69 : "pepper_tokany"}, {70 : "mixed_stew"}, {71 : "mushroom_paprikas"}, | |
| 33 {72 : "roast_pork_a_la_transylvania"}, {73 : "fried_chicken"}, {74 : "szekely_goulash"}, | |
| 34 {75 : "layered_cabbage"}, {76 : "stuffed_cabbage"}, {77 : "fried_liver"}, {78 : "layered_potatoes"}, | |
| 35 {79 : "chulent"}, {80 : "hunters_dish"}, {81 : "chicken_with_carrot"}, | |
| 36 {82 : "fish_with_vegetables"}, {83 : "fried_potatoes_with_dill"}, {84 : "mashed_potatoes"}, | |
| 37 {85 : "potato_with_green_parsley"}, {86 : "fried_potato_with_paprika"}, {87 : "pommes_frites"}, | |
| 38 {88 : "green_peas"}, {89 : "celery_sides"}, {90 : "squash"}, {91 : "green_beans"}, {92 : "spinach"}, | |
| 39 {93 : "crushed_beans"}, {94 : "cabbage"}, {99 : "donuts_from_transylvania"}, | |
| 40 {100 : "apple_cake_with_meringue"}, {101 : "classic_apple_cake"}, {102 : "quick_apple_cake"}, | |
| 41 {103 : "lemon_cake"}, {104 : "linzer_cookies"}, {105 : "pretzel"}, {106 : "walnut_cake"}, | |
| 42 {107 : "cottage_cheese_dumplings"}, {108 : "plum_dumplings"}, {109 : "pancakes"}, | |
| 43 {110 : "fried_pancake"}, {111 : "birds_milk"} | |
| 44 ] | |
| 45 | |
| 46 global_all_ingredients = ['allspice', 'almonds', 'apple', 'apples', 'apricots', 'asparagus', 'aubergine', 'bacon', | |
| 47 'baking-powder', 'basil', 'bay-leaves', 'beans', 'beef', 'boneless-chicken', 'bones', | |
| 48 'borlotti-beans', 'bread', 'breadcrumbs', 'broth', 'bun', 'butter', 'butterhead-lettuce', | |
| 49 'cabbage', 'california-peppers', 'carrot', 'carrots', 'cashews', 'cauliflower', | |
| 50 'cayennepepper', 'celery', 'champignon', 'chanterelle', 'cheese', 'cherries', 'chicken', | |
| 51 'chicken-breast', 'chilli', 'chorizo', 'cinnamon', 'cloves', 'cod', 'coriander', 'corn', | |
| 52 'cottage-cheese', 'crème-fraiche', 'csabai', 'cucumber', 'cumin', 'dill', 'dough', 'duck', | |
| 53 'egg', 'eggs', 'entrecote', 'fat', 'feta', 'fettucine', 'fillets', 'fish', 'flour', 'fusilli', | |
| 54 'garlic', 'gem-lettuce', 'ginger', 'goose', 'grape-leaves', 'gyulai', 'haricot', 'heart', | |
| 55 'herbs', 'hungarian-sausage', 'iceberg-lettuce', 'icing-sugar', 'jam', 'juice', 'kale', | |
| 56 'kidney', 'kidney-beans', 'kielbasa', 'knuckle', 'kohlrabi', 'lamb', 'lard', 'lean-meat', | |
| 57 'legs', 'lemon', 'lemon-juice', 'lettuce', 'liver', 'lovage', 'lung', 'macaroni', 'marjoram', | |
| 58 'marrow', 'marrowbone', 'mayonnaise', 'meat', 'meatballs', 'milk', 'minced-meat', 'mushrooms', | |
| 59 'neck', 'nutmeg', 'oil', 'olives', 'onion', 'onions', 'orange', 'oregano', 'oxtail', | |
| 60 'oyster-mushrooms', 'paprika', 'parsley', 'parsnips', 'pasta', 'peas', 'penne', 'pepper', | |
| 61 'peritoneum', 'pig', 'poppy-seed', 'pork', 'pork-loin', 'potato', 'potatoes', 'prunes', | |
| 62 'puree', 'rabbit', 'radish', 'radishes', 'raisins', 'ribs', 'rice', 'romano-lettuce', 'roots', | |
| 63 'rosemary', 'roux', 'salt', 'sambal-oelek', 'sauerkraut', 'sauerkraut-juice', 'sausage', | |
| 64 'sausages', 'savoury', 'semolina', 'sesame-seed', 'shallots', 'skin', 'smoked', 'smoked-pork', | |
| 65 'soda', 'spinach', 'spleen', 'spring-onion', 'spring-onions', 'squashes', 'squash', 'sugar', | |
| 66 'sunflower-oil', 'sunflower-seeds', 'tagliatelle', 'tarragon', 'tenderloin', 'thickening', | |
| 67 'thyme', 'tomato', 'tomato-juice', 'tomatoes', 'turnip', 'turnips', 'vanilla', 'vanilla-bean', | |
| 68 'vanilla-sugar', 'veal', 'vegeta', 'vegetable', 'vegetables', 'vienna', 'vinegar', 'walnut', | |
| 69 'water', 'wheat-flour', 'whipped', 'whipped-cream', 'wholemeal-bread', 'wine', 'wings', | |
| 70 'yeast', 'yolks', 'zucchini', 'zucchinis'] | |
| 71 | |
| 72 | |
| 73 def ingredient_extractor(): | |
| 74 punc = string.punctuation | |
| 75 # extract all the ingredients in a separate list | |
| 76 ing_stid = 1000 | |
| 77 all_ingredients = [] | |
| 78 for r in all_recipes.values(): | |
| 79 for ingrs in r["ingredients"].values(): | |
| 80 for i in ingrs: | |
| 81 s = list(i.lower()) | |
| 82 ingl = ''.join([o for o in s if not o in punc]).split() | |
| 83 for ii in ingl: | |
| 84 key = ii.lower() | |
| 85 if (not re.match(r"[0-9]+", key)) and len(key) > 1 and (key in global_all_ingredients): | |
| 86 ing_dict = {"food_id": r["id"], "key": key, "ing_id": ing_stid, "food_name": r["title"], | |
| 87 "dupl": any(d["key"] == key for d in all_ingredients)} | |
| 88 ing_stid += 1 | |
| 89 all_ingredients.append(ing_dict) | |
| 90 print("-- food list") | |
| 91 rec_stid = 100 | |
| 92 for r in all_recipes.values(): | |
| 93 new_key = int(r["id"]) | |
| 94 print('insert into food(idx, name_source, type, image, food_key) values ({}, "{}", {}, "{}", "{}");'.format( | |
| 95 new_key, r["key"] + "_name", rec_stid, "{#rroot}/img/icon.png", r["key"])) | |
| 96 print("-- translations for food") | |
| 97 for r in all_recipes.values(): | |
| 98 source = r["key"] + "_name" | |
| 99 print('insert into translations(source, gb) values ("{}", "{}");'.format(source, r["title"])) | |
| 100 print("-- tags of ingredients to food") | |
| 101 all_ingredients = sorted(all_ingredients, key=lambda k: k['food_id']) | |
| 102 inserted_ingrs = [] | |
| 103 for i in all_ingredients: | |
| 104 if not i["key"] + str(i["food_id"]) in inserted_ingrs: | |
| 105 inserted_ingrs.append(i["key"] + str(i["food_id"])) | |
| 106 print("insert into tags(food_id, ingredient_id) values ({}, {}); -- {} / {}".format(i["food_id"], | |
| 107 global_all_ingredients.index( | |
| 108 i["key"]) + 1, | |
| 109 i["key"], | |
| 110 i["food_name"])) | |
| 111 | |
| 112 | |
| 113 def make_key(recipename): | |
| 114 key = recipename.lower() | |
| 115 key = unidecode.unidecode(key) | |
| 116 key = key.replace(" ", "_") | |
| 117 key = key.replace("-", "_") | |
| 118 key = key.replace("\"", "") | |
| 119 key = key.replace("\'", "") | |
| 120 return key | |
| 121 | |
| 122 | |
| 123 def process_file(fn): | |
| 124 global all_recipes | |
| 125 file1 = open(fn, 'r') | |
| 126 # contains a recipe mapped to a number, should be the same in all languages | |
| 127 all_recipes = {} | |
| 128 count = 0 | |
| 129 # Strips the newline character | |
| 130 while True: | |
| 131 | |
| 132 current_recipe = {} | |
| 133 | |
| 134 # Get next line from file | |
| 135 line = file1.readline() | |
| 136 | |
| 137 # end of file is reached | |
| 138 if not line: | |
| 139 break | |
| 140 | |
| 141 stripline = line.strip() | |
| 142 if re.match(r"[0-9]+\.", stripline) or re.match(r"\.[0-9]+\.", stripline) or re.match(r"[0-9][0-9]+\.", | |
| 143 stripline): | |
| 144 # begin recipe | |
| 145 # skip line, it's empty | |
| 146 file1.readline() | |
| 147 # contains the title | |
| 148 title = file1.readline().strip() | |
| 149 if title.endswith("("): | |
| 150 title = title.replace("(", "").strip() | |
| 151 | |
| 152 current_recipe["id"] = stripline.replace(".", "") | |
| 153 current_recipe["key"] = make_key(title) | |
| 154 current_recipe["title"] = title | |
| 155 | |
| 156 # print("{} {}".format(stripline, title) ) | |
| 157 | |
| 158 # read until we get ")" as next stripped line | |
| 159 endoff = False | |
| 160 while True: | |
| 161 skipl = file1.readline() | |
| 162 if not skipl: | |
| 163 endoff = True | |
| 164 break | |
| 165 if skipl.strip() == ")": | |
| 166 # skip the upcoming empty | |
| 167 file1.readline() | |
| 168 break | |
| 169 | |
| 170 # end of file? | |
| 171 if endoff: | |
| 172 break | |
| 173 | |
| 174 # read in the intro | |
| 175 intro = "" | |
| 176 | |
| 177 while True: | |
| 178 skipl = file1.readline() | |
| 179 if not skipl: | |
| 180 endoff = True | |
| 181 break | |
| 182 if skipl == "\n": | |
| 183 break | |
| 184 intro += skipl.strip() + " " | |
| 185 | |
| 186 # end of file? | |
| 187 if endoff: | |
| 188 break | |
| 189 | |
| 190 current_recipe["intro"] = intro.strip() | |
| 191 current_recipe["descr"] = intro.split(".")[0] | |
| 192 | |
| 193 # read in the ingredients and the if you like part | |
| 194 ingredients = {} | |
| 195 current_ingredient_part = [] | |
| 196 current_ingredient_key = "" | |
| 197 while True: | |
| 198 skipl = file1.readline() | |
| 199 if not skipl: | |
| 200 endoff = True | |
| 201 break | |
| 202 if skipl == "\n": | |
| 203 # end of ingredients, fill in the last | |
| 204 ingredients[current_ingredient_key] = current_ingredient_part | |
| 205 current_ingredient_part = [] | |
| 206 break | |
| 207 if skipl.startswith("o "): | |
| 208 current_ingredient_part.append(skipl.strip().replace("o ", "", 1).strip()) | |
| 209 else: | |
| 210 if not current_ingredient_key: | |
| 211 current_ingredient_key = skipl.strip() | |
| 212 else: | |
| 213 ingredients[current_ingredient_key] = current_ingredient_part | |
| 214 current_ingredient_part = [] | |
| 215 current_ingredient_key = skipl.strip() | |
| 216 | |
| 217 recipe = [] | |
| 218 tips_work = [] | |
| 219 destination = recipe | |
| 220 # now load the recipe itself, but be careful for tips and remaining ingredients | |
| 221 while True: | |
| 222 pos = file1.tell() | |
| 223 | |
| 224 skipl = file1.readline() | |
| 225 if not skipl: | |
| 226 endoff = True | |
| 227 break | |
| 228 | |
| 229 trimmed = skipl.strip() | |
| 230 | |
| 231 # did we cross into the next recipe? | |
| 232 if re.match(r"[0-9]+\.", trimmed) and trimmed: | |
| 233 file1.seek(pos) | |
| 234 break | |
| 235 | |
| 236 # line number? | |
| 237 if re.match(r"[0-9]+", trimmed): | |
| 238 continue | |
| 239 | |
| 240 # empty line? | |
| 241 if trimmed == "\n": | |
| 242 continue | |
| 243 | |
| 244 # remaining from the ingredients? | |
| 245 if skipl.startswith("o "): | |
| 246 ingredients[current_ingredient_key].append(skipl.strip().replace("o ", "", 1).strip()) | |
| 247 continue | |
| 248 | |
| 249 if trimmed == "Tips": | |
| 250 destination = tips_work | |
| 251 | |
| 252 destination.append(trimmed) | |
| 253 | |
| 254 # fix the tips, so that it does mot contain the . and empty lines | |
| 255 tips = [] | |
| 256 current_tip = "" | |
| 257 for tip in tips_work: | |
| 258 if not tip.strip(): | |
| 259 continue | |
| 260 if tip == '•' or tip == "-": | |
| 261 if current_tip: | |
| 262 tips.append(current_tip) | |
| 263 current_tip = "" | |
| 264 elif tip != "Tips": | |
| 265 current_tip += tip + " " | |
| 266 tips.append(current_tip) | |
| 267 | |
| 268 # fix the recipe, so that it contains only one line, separated by newlines | |
| 269 current_recipe["fullrecipe"] = " ".join(recipe) | |
| 270 current_recipe["ingredients"] = ingredients | |
| 271 | |
| 272 all_recipes[current_recipe["id"]] = current_recipe | |
| 273 | |
| 274 return all_recipes | |
| 275 | |
| 276 | |
| 277 # ingredient_extractor() | |
| 278 | |
| 279 files = ["book2-lin-compressed.txt"] | |
| 280 | |
| 281 list_all_recipes = [] | |
| 282 for file in files: | |
| 283 list_all_recipes.append(process_file(file)) | |
| 284 | |
| 285 if os.path.exists("temp_w"): | |
| 286 shutil.rmtree("temp_w") | |
| 287 | |
| 288 if not os.path.exists("temp_w"): | |
| 289 os.mkdir("temp_w") | |
| 290 | |
| 291 | |
| 292 for rl in list_all_recipes: | |
| 293 for r in rl.values(): | |
| 294 # create the directory for the food | |
| 295 key = "" | |
| 296 for rec_id_key in all_recipe_keys: | |
| 297 if int(r["id"]) in rec_id_key: | |
| 298 key = rec_id_key[int(r["id"])] | |
| 299 break | |
| 300 if not key: | |
| 301 pprint.pprint(r) | |
| 302 print("No key found for this above") | |
| 303 sys.exit(3) | |
| 304 | |
| 305 print("update translations set no='{}' where source='{}_name';".format(r["title"], key)) | |
| 306 continue | |
| 307 wpath = os.path.join("temp_w", key) | |
| 308 wpath_no = os.path.join("temp_w", key, "no") | |
| 309 if not os.path.exists(wpath): | |
| 310 os.mkdir(wpath) | |
| 311 os.mkdir(wpath_no) | |
| 312 | |
| 313 # descr.md | |
| 314 filepath_descrmd_gb = os.path.join("temp_w", key, "no", "descr.md") | |
| 315 descr_f = open(filepath_descrmd_gb, "w") | |
| 316 descr_f.write(r["descr"] + ".") | |
| 317 descr_f.close() | |
| 318 | |
| 319 # intro.md | |
| 320 filepath_intromd_gb = os.path.join("temp_w", key, "no", "intro.md") | |
| 321 intro_f = open(filepath_intromd_gb, "w") | |
| 322 intro_f.write(r["intro"]) | |
| 323 intro_f.close() | |
| 324 | |
| 325 # recipe.md | |
| 326 filepath_recipemd_gb = os.path.join("temp_w", key, "no", "recipe.md") | |
| 327 recipe_f = open(filepath_recipemd_gb, "w") | |
| 328 ## ingredients | |
| 329 for ing_root in r["ingredients"].keys(): | |
| 330 recipe_f.write("\n### " + ing_root + "\n") | |
| 331 for ing in r["ingredients"][ing_root]: | |
| 332 recipe_f.write("- " + ing + "\n") | |
| 333 ## the recipe itself | |
| 334 recipe_f.write("\n### Slik gjør du det\n") | |
| 335 | |
| 336 ### break up the fullrecip ein order to not to have more than two sentenced per paragraph | |
| 337 full_recipe_text = r["fullrecipe"] | |
| 338 newline_counter = 0 | |
| 339 chars_written = 0 | |
| 340 newline_needed = False | |
| 341 for c in full_recipe_text: | |
| 342 recipe_f.write(c) | |
| 343 chars_written += 1 | |
| 344 if c == '.': | |
| 345 newline_counter += 1 | |
| 346 if newline_counter >= 2: | |
| 347 if chars_written > 120: | |
| 348 newline_counter = 0 | |
| 349 recipe_f.write("\n\n") | |
| 350 | |
| 351 ## tips | |
| 352 if "tips" in r: | |
| 353 recipe_f.write("\nTips\n") | |
| 354 for tip in r["tips"].values(): | |
| 355 recipe_f.write("- " + tip) | |
| 356 recipe_f.close() | |
| 357 |
