annotate server/python/tmod.py @ 0:a4671277546c tip

created the repository for the thymian project
author ferencd
date Tue, 17 Aug 2021 11:19:54 +0200
parents
children
rev   line source
ferencd@0 1 import json
ferencd@0 2 import os
ferencd@0 3 import shutil
ferencd@0 4 import re
ferencd@0 5 import sys
ferencd@0 6
ferencd@0 7 import unidecode
ferencd@0 8 import pprint
ferencd@0 9 import string
ferencd@0 10
ferencd@0 11 all_recipe_keys = [ {1 : "furry_bread"}, {2 : "bread_cake"}, {3 : "fried_mushrooms"}, {4 : "jewish_eggs"},
ferencd@0 12 {5 : "mushroom_pancake"}, {6 : "mashed_aubergine"}, {7 : "boiled_bacon"}, {8 : "vegetable_spread"},
ferencd@0 13 {9 : "homemade_pate"}, {10 : "transylvanian_omelette"}, {11 : "beef_salad"},
ferencd@0 14 {12 : "polenta_with_cheese"}, {13 : "clear_chicken_soup"}, {14 : "clear_beef_soup"},
ferencd@0 15 {15 : "goulash_soup"}, {16 : "fried_meat_soup"}, {17 : "stew_soup"}, {18 : "sauerkraut_juice_soup"},
ferencd@0 16 {19 : "sauerkraut_soup"}, {20 : "cabbage_soup"}, {21 : "stuffed_peppers"}, {22 : "stuffed_leaves"},
ferencd@0 17 {23 : "stuffed_squash"}, {24 : "stuffed_kohlrabi"}, {25 : "stuffed_onion"},
ferencd@0 18 {26 : "bean_soup_with_tomato"}, {27 : "bean_soup_with_tarragon"}, {28 : "horseradish_soup"},
ferencd@0 19 {29 : "potato_soup_with_tarragon"}, {30 : "lamb_soup_with_tarragon"},
ferencd@0 20 {31 : "lettuce_soup_with_meat"}, {32 : "bean___lettuce_soup"}, {33 : "lettuce_soup_with_eggs"},
ferencd@0 21 {34 : "spinach_soup"}, {35 : "cauliflower_soup"}, {36 : "garlic_soup"}, {37 : "bread_soup"},
ferencd@0 22 {38 : "thick_vegetable_soup"}, {39 : "the_fastest_lettuce_soup_ever"}, {40 : "french_bean_soup"},
ferencd@0 23 {41 : "egg_soup_from_transylvania"}, {42 : "amazingly_simple_potato_soup"},
ferencd@0 24 {43 : "cabbage_tomato_soup"}, {44 : "sour_mushroom_soup"}, {45 : "mushroom_soup"},
ferencd@0 25 {46 : "tomato_soup"}, {47 : "tiny_flour_dumpling_soup"}, {48 : "apple_soup"},
ferencd@0 26 {49 : "sour_cherry_soup"}, {50 : "dried_prune_soup"}, {52 : "meatball_soup"},
ferencd@0 27 {53 : "meatball_soup_with_tarragon"}, {54 : "chicken_soup_with_vegetables"},
ferencd@0 28 {55 : "ratatouille_from_transylvania"}, {56 : "gipsy_ratatouille"}, {57 : "potato_goulash"},
ferencd@0 29 {58 : "mixed_stew"}, {59 : "risotto_ala_transylvania"}, {60 : "schnitzel"},
ferencd@0 30 {61 : "butter_fried_chicken"}, {62 : "meat_roasted_with_onions"}, {63 : "roasted_meat_ala_brasov"},
ferencd@0 31 {64 : "meatballs"}, {65 : "smalls"}, {66 : "lamb_cake"}, {67 : "grill_ala_transylvania"},
ferencd@0 32 {68 : "chicken_paprikas"}, {69 : "pepper_tokany"}, {70 : "mixed_stew"}, {71 : "mushroom_paprikas"},
ferencd@0 33 {72 : "roast_pork_a_la_transylvania"}, {73 : "fried_chicken"}, {74 : "szekely_goulash"},
ferencd@0 34 {75 : "layered_cabbage"}, {76 : "stuffed_cabbage"}, {77 : "fried_liver"}, {78 : "layered_potatoes"},
ferencd@0 35 {79 : "chulent"}, {80 : "hunters_dish"}, {81 : "chicken_with_carrot"},
ferencd@0 36 {82 : "fish_with_vegetables"}, {83 : "fried_potatoes_with_dill"}, {84 : "mashed_potatoes"},
ferencd@0 37 {85 : "potato_with_green_parsley"}, {86 : "fried_potato_with_paprika"}, {87 : "pommes_frites"},
ferencd@0 38 {88 : "green_peas"}, {89 : "celery_sides"}, {90 : "squash"}, {91 : "green_beans"}, {92 : "spinach"},
ferencd@0 39 {93 : "crushed_beans"}, {94 : "cabbage"}, {99 : "donuts_from_transylvania"},
ferencd@0 40 {100 : "apple_cake_with_meringue"}, {101 : "classic_apple_cake"}, {102 : "quick_apple_cake"},
ferencd@0 41 {103 : "lemon_cake"}, {104 : "linzer_cookies"}, {105 : "pretzel"}, {106 : "walnut_cake"},
ferencd@0 42 {107 : "cottage_cheese_dumplings"}, {108 : "plum_dumplings"}, {109 : "pancakes"},
ferencd@0 43 {110 : "fried_pancake"}, {111 : "birds_milk"}
ferencd@0 44 ]
ferencd@0 45
ferencd@0 46 global_all_ingredients = ['allspice', 'almonds', 'apple', 'apples', 'apricots', 'asparagus', 'aubergine', 'bacon',
ferencd@0 47 'baking-powder', 'basil', 'bay-leaves', 'beans', 'beef', 'boneless-chicken', 'bones',
ferencd@0 48 'borlotti-beans', 'bread', 'breadcrumbs', 'broth', 'bun', 'butter', 'butterhead-lettuce',
ferencd@0 49 'cabbage', 'california-peppers', 'carrot', 'carrots', 'cashews', 'cauliflower',
ferencd@0 50 'cayennepepper', 'celery', 'champignon', 'chanterelle', 'cheese', 'cherries', 'chicken',
ferencd@0 51 'chicken-breast', 'chilli', 'chorizo', 'cinnamon', 'cloves', 'cod', 'coriander', 'corn',
ferencd@0 52 'cottage-cheese', 'crème-fraiche', 'csabai', 'cucumber', 'cumin', 'dill', 'dough', 'duck',
ferencd@0 53 'egg', 'eggs', 'entrecote', 'fat', 'feta', 'fettucine', 'fillets', 'fish', 'flour', 'fusilli',
ferencd@0 54 'garlic', 'gem-lettuce', 'ginger', 'goose', 'grape-leaves', 'gyulai', 'haricot', 'heart',
ferencd@0 55 'herbs', 'hungarian-sausage', 'iceberg-lettuce', 'icing-sugar', 'jam', 'juice', 'kale',
ferencd@0 56 'kidney', 'kidney-beans', 'kielbasa', 'knuckle', 'kohlrabi', 'lamb', 'lard', 'lean-meat',
ferencd@0 57 'legs', 'lemon', 'lemon-juice', 'lettuce', 'liver', 'lovage', 'lung', 'macaroni', 'marjoram',
ferencd@0 58 'marrow', 'marrowbone', 'mayonnaise', 'meat', 'meatballs', 'milk', 'minced-meat', 'mushrooms',
ferencd@0 59 'neck', 'nutmeg', 'oil', 'olives', 'onion', 'onions', 'orange', 'oregano', 'oxtail',
ferencd@0 60 'oyster-mushrooms', 'paprika', 'parsley', 'parsnips', 'pasta', 'peas', 'penne', 'pepper',
ferencd@0 61 'peritoneum', 'pig', 'poppy-seed', 'pork', 'pork-loin', 'potato', 'potatoes', 'prunes',
ferencd@0 62 'puree', 'rabbit', 'radish', 'radishes', 'raisins', 'ribs', 'rice', 'romano-lettuce', 'roots',
ferencd@0 63 'rosemary', 'roux', 'salt', 'sambal-oelek', 'sauerkraut', 'sauerkraut-juice', 'sausage',
ferencd@0 64 'sausages', 'savoury', 'semolina', 'sesame-seed', 'shallots', 'skin', 'smoked', 'smoked-pork',
ferencd@0 65 'soda', 'spinach', 'spleen', 'spring-onion', 'spring-onions', 'squashes', 'squash', 'sugar',
ferencd@0 66 'sunflower-oil', 'sunflower-seeds', 'tagliatelle', 'tarragon', 'tenderloin', 'thickening',
ferencd@0 67 'thyme', 'tomato', 'tomato-juice', 'tomatoes', 'turnip', 'turnips', 'vanilla', 'vanilla-bean',
ferencd@0 68 'vanilla-sugar', 'veal', 'vegeta', 'vegetable', 'vegetables', 'vienna', 'vinegar', 'walnut',
ferencd@0 69 'water', 'wheat-flour', 'whipped', 'whipped-cream', 'wholemeal-bread', 'wine', 'wings',
ferencd@0 70 'yeast', 'yolks', 'zucchini', 'zucchinis']
ferencd@0 71
ferencd@0 72
ferencd@0 73 def ingredient_extractor():
ferencd@0 74 punc = string.punctuation
ferencd@0 75 # extract all the ingredients in a separate list
ferencd@0 76 ing_stid = 1000
ferencd@0 77 all_ingredients = []
ferencd@0 78 for r in all_recipes.values():
ferencd@0 79 for ingrs in r["ingredients"].values():
ferencd@0 80 for i in ingrs:
ferencd@0 81 s = list(i.lower())
ferencd@0 82 ingl = ''.join([o for o in s if not o in punc]).split()
ferencd@0 83 for ii in ingl:
ferencd@0 84 key = ii.lower()
ferencd@0 85 if (not re.match(r"[0-9]+", key)) and len(key) > 1 and (key in global_all_ingredients):
ferencd@0 86 ing_dict = {"food_id": r["id"], "key": key, "ing_id": ing_stid, "food_name": r["title"],
ferencd@0 87 "dupl": any(d["key"] == key for d in all_ingredients)}
ferencd@0 88 ing_stid += 1
ferencd@0 89 all_ingredients.append(ing_dict)
ferencd@0 90 print("-- food list")
ferencd@0 91 rec_stid = 100
ferencd@0 92 for r in all_recipes.values():
ferencd@0 93 new_key = int(r["id"])
ferencd@0 94 print('insert into food(idx, name_source, type, image, food_key) values ({}, "{}", {}, "{}", "{}");'.format(
ferencd@0 95 new_key, r["key"] + "_name", rec_stid, "{#rroot}/img/icon.png", r["key"]))
ferencd@0 96 print("-- translations for food")
ferencd@0 97 for r in all_recipes.values():
ferencd@0 98 source = r["key"] + "_name"
ferencd@0 99 print('insert into translations(source, gb) values ("{}", "{}");'.format(source, r["title"]))
ferencd@0 100 print("-- tags of ingredients to food")
ferencd@0 101 all_ingredients = sorted(all_ingredients, key=lambda k: k['food_id'])
ferencd@0 102 inserted_ingrs = []
ferencd@0 103 for i in all_ingredients:
ferencd@0 104 if not i["key"] + str(i["food_id"]) in inserted_ingrs:
ferencd@0 105 inserted_ingrs.append(i["key"] + str(i["food_id"]))
ferencd@0 106 print("insert into tags(food_id, ingredient_id) values ({}, {}); -- {} / {}".format(i["food_id"],
ferencd@0 107 global_all_ingredients.index(
ferencd@0 108 i["key"]) + 1,
ferencd@0 109 i["key"],
ferencd@0 110 i["food_name"]))
ferencd@0 111
ferencd@0 112
ferencd@0 113 def make_key(recipename):
ferencd@0 114 key = recipename.lower()
ferencd@0 115 key = unidecode.unidecode(key)
ferencd@0 116 key = key.replace(" ", "_")
ferencd@0 117 key = key.replace("-", "_")
ferencd@0 118 key = key.replace("\"", "")
ferencd@0 119 key = key.replace("\'", "")
ferencd@0 120 return key
ferencd@0 121
ferencd@0 122
ferencd@0 123 def process_file(fn):
ferencd@0 124 global all_recipes
ferencd@0 125 file1 = open(fn, 'r')
ferencd@0 126 # contains a recipe mapped to a number, should be the same in all languages
ferencd@0 127 all_recipes = {}
ferencd@0 128 count = 0
ferencd@0 129 # Strips the newline character
ferencd@0 130 while True:
ferencd@0 131
ferencd@0 132 current_recipe = {}
ferencd@0 133
ferencd@0 134 # Get next line from file
ferencd@0 135 line = file1.readline()
ferencd@0 136
ferencd@0 137 # end of file is reached
ferencd@0 138 if not line:
ferencd@0 139 break
ferencd@0 140
ferencd@0 141 stripline = line.strip()
ferencd@0 142 if re.match(r"[0-9]+\.", stripline) or re.match(r"\.[0-9]+\.", stripline) or re.match(r"[0-9][0-9]+\.",
ferencd@0 143 stripline):
ferencd@0 144 # begin recipe
ferencd@0 145 # skip line, it's empty
ferencd@0 146 file1.readline()
ferencd@0 147 # contains the title
ferencd@0 148 title = file1.readline().strip()
ferencd@0 149 if title.endswith("("):
ferencd@0 150 title = title.replace("(", "").strip()
ferencd@0 151
ferencd@0 152 current_recipe["id"] = stripline.replace(".", "")
ferencd@0 153 current_recipe["key"] = make_key(title)
ferencd@0 154 current_recipe["title"] = title
ferencd@0 155
ferencd@0 156 # print("{} {}".format(stripline, title) )
ferencd@0 157
ferencd@0 158 # read until we get ")" as next stripped line
ferencd@0 159 endoff = False
ferencd@0 160 while True:
ferencd@0 161 skipl = file1.readline()
ferencd@0 162 if not skipl:
ferencd@0 163 endoff = True
ferencd@0 164 break
ferencd@0 165 if skipl.strip() == ")":
ferencd@0 166 # skip the upcoming empty
ferencd@0 167 file1.readline()
ferencd@0 168 break
ferencd@0 169
ferencd@0 170 # end of file?
ferencd@0 171 if endoff:
ferencd@0 172 break
ferencd@0 173
ferencd@0 174 # read in the intro
ferencd@0 175 intro = ""
ferencd@0 176
ferencd@0 177 while True:
ferencd@0 178 skipl = file1.readline()
ferencd@0 179 if not skipl:
ferencd@0 180 endoff = True
ferencd@0 181 break
ferencd@0 182 if skipl == "\n":
ferencd@0 183 break
ferencd@0 184 intro += skipl.strip() + " "
ferencd@0 185
ferencd@0 186 # end of file?
ferencd@0 187 if endoff:
ferencd@0 188 break
ferencd@0 189
ferencd@0 190 current_recipe["intro"] = intro.strip()
ferencd@0 191 current_recipe["descr"] = intro.split(".")[0]
ferencd@0 192
ferencd@0 193 # read in the ingredients and the if you like part
ferencd@0 194 ingredients = {}
ferencd@0 195 current_ingredient_part = []
ferencd@0 196 current_ingredient_key = ""
ferencd@0 197 while True:
ferencd@0 198 skipl = file1.readline()
ferencd@0 199 if not skipl:
ferencd@0 200 endoff = True
ferencd@0 201 break
ferencd@0 202 if skipl == "\n":
ferencd@0 203 # end of ingredients, fill in the last
ferencd@0 204 ingredients[current_ingredient_key] = current_ingredient_part
ferencd@0 205 current_ingredient_part = []
ferencd@0 206 break
ferencd@0 207 if skipl.startswith("o "):
ferencd@0 208 current_ingredient_part.append(skipl.strip().replace("o ", "", 1).strip())
ferencd@0 209 else:
ferencd@0 210 if not current_ingredient_key:
ferencd@0 211 current_ingredient_key = skipl.strip()
ferencd@0 212 else:
ferencd@0 213 ingredients[current_ingredient_key] = current_ingredient_part
ferencd@0 214 current_ingredient_part = []
ferencd@0 215 current_ingredient_key = skipl.strip()
ferencd@0 216
ferencd@0 217 recipe = []
ferencd@0 218 tips_work = []
ferencd@0 219 destination = recipe
ferencd@0 220 # now load the recipe itself, but be careful for tips and remaining ingredients
ferencd@0 221 while True:
ferencd@0 222 pos = file1.tell()
ferencd@0 223
ferencd@0 224 skipl = file1.readline()
ferencd@0 225 if not skipl:
ferencd@0 226 endoff = True
ferencd@0 227 break
ferencd@0 228
ferencd@0 229 trimmed = skipl.strip()
ferencd@0 230
ferencd@0 231 # did we cross into the next recipe?
ferencd@0 232 if re.match(r"[0-9]+\.", trimmed) and trimmed:
ferencd@0 233 file1.seek(pos)
ferencd@0 234 break
ferencd@0 235
ferencd@0 236 # line number?
ferencd@0 237 if re.match(r"[0-9]+", trimmed):
ferencd@0 238 continue
ferencd@0 239
ferencd@0 240 # empty line?
ferencd@0 241 if trimmed == "\n":
ferencd@0 242 continue
ferencd@0 243
ferencd@0 244 # remaining from the ingredients?
ferencd@0 245 if skipl.startswith("o "):
ferencd@0 246 ingredients[current_ingredient_key].append(skipl.strip().replace("o ", "", 1).strip())
ferencd@0 247 continue
ferencd@0 248
ferencd@0 249 if trimmed == "Tips":
ferencd@0 250 destination = tips_work
ferencd@0 251
ferencd@0 252 destination.append(trimmed)
ferencd@0 253
ferencd@0 254 # fix the tips, so that it does mot contain the . and empty lines
ferencd@0 255 tips = []
ferencd@0 256 current_tip = ""
ferencd@0 257 for tip in tips_work:
ferencd@0 258 if not tip.strip():
ferencd@0 259 continue
ferencd@0 260 if tip == '•' or tip == "-":
ferencd@0 261 if current_tip:
ferencd@0 262 tips.append(current_tip)
ferencd@0 263 current_tip = ""
ferencd@0 264 elif tip != "Tips":
ferencd@0 265 current_tip += tip + " "
ferencd@0 266 tips.append(current_tip)
ferencd@0 267
ferencd@0 268 # fix the recipe, so that it contains only one line, separated by newlines
ferencd@0 269 current_recipe["fullrecipe"] = " ".join(recipe)
ferencd@0 270 current_recipe["ingredients"] = ingredients
ferencd@0 271
ferencd@0 272 all_recipes[current_recipe["id"]] = current_recipe
ferencd@0 273
ferencd@0 274 return all_recipes
ferencd@0 275
ferencd@0 276
ferencd@0 277 # ingredient_extractor()
ferencd@0 278
ferencd@0 279 files = ["book2-lin-compressed.txt"]
ferencd@0 280
ferencd@0 281 list_all_recipes = []
ferencd@0 282 for file in files:
ferencd@0 283 list_all_recipes.append(process_file(file))
ferencd@0 284
ferencd@0 285 if os.path.exists("temp_w"):
ferencd@0 286 shutil.rmtree("temp_w")
ferencd@0 287
ferencd@0 288 if not os.path.exists("temp_w"):
ferencd@0 289 os.mkdir("temp_w")
ferencd@0 290
ferencd@0 291
ferencd@0 292 for rl in list_all_recipes:
ferencd@0 293 for r in rl.values():
ferencd@0 294 # create the directory for the food
ferencd@0 295 key = ""
ferencd@0 296 for rec_id_key in all_recipe_keys:
ferencd@0 297 if int(r["id"]) in rec_id_key:
ferencd@0 298 key = rec_id_key[int(r["id"])]
ferencd@0 299 break
ferencd@0 300 if not key:
ferencd@0 301 pprint.pprint(r)
ferencd@0 302 print("No key found for this above")
ferencd@0 303 sys.exit(3)
ferencd@0 304
ferencd@0 305 print("update translations set no='{}' where source='{}_name';".format(r["title"], key))
ferencd@0 306 continue
ferencd@0 307 wpath = os.path.join("temp_w", key)
ferencd@0 308 wpath_no = os.path.join("temp_w", key, "no")
ferencd@0 309 if not os.path.exists(wpath):
ferencd@0 310 os.mkdir(wpath)
ferencd@0 311 os.mkdir(wpath_no)
ferencd@0 312
ferencd@0 313 # descr.md
ferencd@0 314 filepath_descrmd_gb = os.path.join("temp_w", key, "no", "descr.md")
ferencd@0 315 descr_f = open(filepath_descrmd_gb, "w")
ferencd@0 316 descr_f.write(r["descr"] + ".")
ferencd@0 317 descr_f.close()
ferencd@0 318
ferencd@0 319 # intro.md
ferencd@0 320 filepath_intromd_gb = os.path.join("temp_w", key, "no", "intro.md")
ferencd@0 321 intro_f = open(filepath_intromd_gb, "w")
ferencd@0 322 intro_f.write(r["intro"])
ferencd@0 323 intro_f.close()
ferencd@0 324
ferencd@0 325 # recipe.md
ferencd@0 326 filepath_recipemd_gb = os.path.join("temp_w", key, "no", "recipe.md")
ferencd@0 327 recipe_f = open(filepath_recipemd_gb, "w")
ferencd@0 328 ## ingredients
ferencd@0 329 for ing_root in r["ingredients"].keys():
ferencd@0 330 recipe_f.write("\n### " + ing_root + "\n")
ferencd@0 331 for ing in r["ingredients"][ing_root]:
ferencd@0 332 recipe_f.write("- " + ing + "\n")
ferencd@0 333 ## the recipe itself
ferencd@0 334 recipe_f.write("\n### Slik gjør du det\n")
ferencd@0 335
ferencd@0 336 ### break up the fullrecip ein order to not to have more than two sentenced per paragraph
ferencd@0 337 full_recipe_text = r["fullrecipe"]
ferencd@0 338 newline_counter = 0
ferencd@0 339 chars_written = 0
ferencd@0 340 newline_needed = False
ferencd@0 341 for c in full_recipe_text:
ferencd@0 342 recipe_f.write(c)
ferencd@0 343 chars_written += 1
ferencd@0 344 if c == '.':
ferencd@0 345 newline_counter += 1
ferencd@0 346 if newline_counter >= 2:
ferencd@0 347 if chars_written > 120:
ferencd@0 348 newline_counter = 0
ferencd@0 349 recipe_f.write("\n\n")
ferencd@0 350
ferencd@0 351 ## tips
ferencd@0 352 if "tips" in r:
ferencd@0 353 recipe_f.write("\nTips\n")
ferencd@0 354 for tip in r["tips"].values():
ferencd@0 355 recipe_f.write("- " + tip)
ferencd@0 356 recipe_f.close()
ferencd@0 357