ferencd@0: import json ferencd@0: import os ferencd@0: import shutil ferencd@0: import re ferencd@0: import sys ferencd@0: ferencd@0: import unidecode ferencd@0: import pprint ferencd@0: import string ferencd@0: ferencd@0: all_recipe_keys = [ {1 : "furry_bread"}, {2 : "bread_cake"}, {3 : "fried_mushrooms"}, {4 : "jewish_eggs"}, ferencd@0: {5 : "mushroom_pancake"}, {6 : "mashed_aubergine"}, {7 : "boiled_bacon"}, {8 : "vegetable_spread"}, ferencd@0: {9 : "homemade_pate"}, {10 : "transylvanian_omelette"}, {11 : "beef_salad"}, ferencd@0: {12 : "polenta_with_cheese"}, {13 : "clear_chicken_soup"}, {14 : "clear_beef_soup"}, ferencd@0: {15 : "goulash_soup"}, {16 : "fried_meat_soup"}, {17 : "stew_soup"}, {18 : "sauerkraut_juice_soup"}, ferencd@0: {19 : "sauerkraut_soup"}, {20 : "cabbage_soup"}, {21 : "stuffed_peppers"}, {22 : "stuffed_leaves"}, ferencd@0: {23 : "stuffed_squash"}, {24 : "stuffed_kohlrabi"}, {25 : "stuffed_onion"}, ferencd@0: {26 : "bean_soup_with_tomato"}, {27 : "bean_soup_with_tarragon"}, {28 : "horseradish_soup"}, ferencd@0: {29 : "potato_soup_with_tarragon"}, {30 : "lamb_soup_with_tarragon"}, ferencd@0: {31 : "lettuce_soup_with_meat"}, {32 : "bean___lettuce_soup"}, {33 : "lettuce_soup_with_eggs"}, ferencd@0: {34 : "spinach_soup"}, {35 : "cauliflower_soup"}, {36 : "garlic_soup"}, {37 : "bread_soup"}, ferencd@0: {38 : "thick_vegetable_soup"}, {39 : "the_fastest_lettuce_soup_ever"}, {40 : "french_bean_soup"}, ferencd@0: {41 : "egg_soup_from_transylvania"}, {42 : "amazingly_simple_potato_soup"}, ferencd@0: {43 : "cabbage_tomato_soup"}, {44 : "sour_mushroom_soup"}, {45 : "mushroom_soup"}, ferencd@0: {46 : "tomato_soup"}, {47 : "tiny_flour_dumpling_soup"}, {48 : "apple_soup"}, ferencd@0: {49 : "sour_cherry_soup"}, {50 : "dried_prune_soup"}, {52 : "meatball_soup"}, ferencd@0: {53 : "meatball_soup_with_tarragon"}, {54 : "chicken_soup_with_vegetables"}, ferencd@0: {55 : "ratatouille_from_transylvania"}, {56 : "gipsy_ratatouille"}, {57 : "potato_goulash"}, ferencd@0: {58 : "mixed_stew"}, {59 : "risotto_ala_transylvania"}, {60 : "schnitzel"}, ferencd@0: {61 : "butter_fried_chicken"}, {62 : "meat_roasted_with_onions"}, {63 : "roasted_meat_ala_brasov"}, ferencd@0: {64 : "meatballs"}, {65 : "smalls"}, {66 : "lamb_cake"}, {67 : "grill_ala_transylvania"}, ferencd@0: {68 : "chicken_paprikas"}, {69 : "pepper_tokany"}, {70 : "mixed_stew"}, {71 : "mushroom_paprikas"}, ferencd@0: {72 : "roast_pork_a_la_transylvania"}, {73 : "fried_chicken"}, {74 : "szekely_goulash"}, ferencd@0: {75 : "layered_cabbage"}, {76 : "stuffed_cabbage"}, {77 : "fried_liver"}, {78 : "layered_potatoes"}, ferencd@0: {79 : "chulent"}, {80 : "hunters_dish"}, {81 : "chicken_with_carrot"}, ferencd@0: {82 : "fish_with_vegetables"}, {83 : "fried_potatoes_with_dill"}, {84 : "mashed_potatoes"}, ferencd@0: {85 : "potato_with_green_parsley"}, {86 : "fried_potato_with_paprika"}, {87 : "pommes_frites"}, ferencd@0: {88 : "green_peas"}, {89 : "celery_sides"}, {90 : "squash"}, {91 : "green_beans"}, {92 : "spinach"}, ferencd@0: {93 : "crushed_beans"}, {94 : "cabbage"}, {99 : "donuts_from_transylvania"}, ferencd@0: {100 : "apple_cake_with_meringue"}, {101 : "classic_apple_cake"}, {102 : "quick_apple_cake"}, ferencd@0: {103 : "lemon_cake"}, {104 : "linzer_cookies"}, {105 : "pretzel"}, {106 : "walnut_cake"}, ferencd@0: {107 : "cottage_cheese_dumplings"}, {108 : "plum_dumplings"}, {109 : "pancakes"}, ferencd@0: {110 : "fried_pancake"}, {111 : "birds_milk"} ferencd@0: ] ferencd@0: ferencd@0: global_all_ingredients = ['allspice', 'almonds', 'apple', 'apples', 'apricots', 'asparagus', 'aubergine', 'bacon', ferencd@0: 'baking-powder', 'basil', 'bay-leaves', 'beans', 'beef', 'boneless-chicken', 'bones', ferencd@0: 'borlotti-beans', 'bread', 'breadcrumbs', 'broth', 'bun', 'butter', 'butterhead-lettuce', ferencd@0: 'cabbage', 'california-peppers', 'carrot', 'carrots', 'cashews', 'cauliflower', ferencd@0: 'cayennepepper', 'celery', 'champignon', 'chanterelle', 'cheese', 'cherries', 'chicken', ferencd@0: 'chicken-breast', 'chilli', 'chorizo', 'cinnamon', 'cloves', 'cod', 'coriander', 'corn', ferencd@0: 'cottage-cheese', 'crème-fraiche', 'csabai', 'cucumber', 'cumin', 'dill', 'dough', 'duck', ferencd@0: 'egg', 'eggs', 'entrecote', 'fat', 'feta', 'fettucine', 'fillets', 'fish', 'flour', 'fusilli', ferencd@0: 'garlic', 'gem-lettuce', 'ginger', 'goose', 'grape-leaves', 'gyulai', 'haricot', 'heart', ferencd@0: 'herbs', 'hungarian-sausage', 'iceberg-lettuce', 'icing-sugar', 'jam', 'juice', 'kale', ferencd@0: 'kidney', 'kidney-beans', 'kielbasa', 'knuckle', 'kohlrabi', 'lamb', 'lard', 'lean-meat', ferencd@0: 'legs', 'lemon', 'lemon-juice', 'lettuce', 'liver', 'lovage', 'lung', 'macaroni', 'marjoram', ferencd@0: 'marrow', 'marrowbone', 'mayonnaise', 'meat', 'meatballs', 'milk', 'minced-meat', 'mushrooms', ferencd@0: 'neck', 'nutmeg', 'oil', 'olives', 'onion', 'onions', 'orange', 'oregano', 'oxtail', ferencd@0: 'oyster-mushrooms', 'paprika', 'parsley', 'parsnips', 'pasta', 'peas', 'penne', 'pepper', ferencd@0: 'peritoneum', 'pig', 'poppy-seed', 'pork', 'pork-loin', 'potato', 'potatoes', 'prunes', ferencd@0: 'puree', 'rabbit', 'radish', 'radishes', 'raisins', 'ribs', 'rice', 'romano-lettuce', 'roots', ferencd@0: 'rosemary', 'roux', 'salt', 'sambal-oelek', 'sauerkraut', 'sauerkraut-juice', 'sausage', ferencd@0: 'sausages', 'savoury', 'semolina', 'sesame-seed', 'shallots', 'skin', 'smoked', 'smoked-pork', ferencd@0: 'soda', 'spinach', 'spleen', 'spring-onion', 'spring-onions', 'squashes', 'squash', 'sugar', ferencd@0: 'sunflower-oil', 'sunflower-seeds', 'tagliatelle', 'tarragon', 'tenderloin', 'thickening', ferencd@0: 'thyme', 'tomato', 'tomato-juice', 'tomatoes', 'turnip', 'turnips', 'vanilla', 'vanilla-bean', ferencd@0: 'vanilla-sugar', 'veal', 'vegeta', 'vegetable', 'vegetables', 'vienna', 'vinegar', 'walnut', ferencd@0: 'water', 'wheat-flour', 'whipped', 'whipped-cream', 'wholemeal-bread', 'wine', 'wings', ferencd@0: 'yeast', 'yolks', 'zucchini', 'zucchinis'] ferencd@0: ferencd@0: ferencd@0: def ingredient_extractor(): ferencd@0: punc = string.punctuation ferencd@0: # extract all the ingredients in a separate list ferencd@0: ing_stid = 1000 ferencd@0: all_ingredients = [] ferencd@0: for r in all_recipes.values(): ferencd@0: for ingrs in r["ingredients"].values(): ferencd@0: for i in ingrs: ferencd@0: s = list(i.lower()) ferencd@0: ingl = ''.join([o for o in s if not o in punc]).split() ferencd@0: for ii in ingl: ferencd@0: key = ii.lower() ferencd@0: if (not re.match(r"[0-9]+", key)) and len(key) > 1 and (key in global_all_ingredients): ferencd@0: ing_dict = {"food_id": r["id"], "key": key, "ing_id": ing_stid, "food_name": r["title"], ferencd@0: "dupl": any(d["key"] == key for d in all_ingredients)} ferencd@0: ing_stid += 1 ferencd@0: all_ingredients.append(ing_dict) ferencd@0: print("-- food list") ferencd@0: rec_stid = 100 ferencd@0: for r in all_recipes.values(): ferencd@0: new_key = int(r["id"]) ferencd@0: print('insert into food(idx, name_source, type, image, food_key) values ({}, "{}", {}, "{}", "{}");'.format( ferencd@0: new_key, r["key"] + "_name", rec_stid, "{#rroot}/img/icon.png", r["key"])) ferencd@0: print("-- translations for food") ferencd@0: for r in all_recipes.values(): ferencd@0: source = r["key"] + "_name" ferencd@0: print('insert into translations(source, gb) values ("{}", "{}");'.format(source, r["title"])) ferencd@0: print("-- tags of ingredients to food") ferencd@0: all_ingredients = sorted(all_ingredients, key=lambda k: k['food_id']) ferencd@0: inserted_ingrs = [] ferencd@0: for i in all_ingredients: ferencd@0: if not i["key"] + str(i["food_id"]) in inserted_ingrs: ferencd@0: inserted_ingrs.append(i["key"] + str(i["food_id"])) ferencd@0: print("insert into tags(food_id, ingredient_id) values ({}, {}); -- {} / {}".format(i["food_id"], ferencd@0: global_all_ingredients.index( ferencd@0: i["key"]) + 1, ferencd@0: i["key"], ferencd@0: i["food_name"])) ferencd@0: ferencd@0: ferencd@0: def make_key(recipename): ferencd@0: key = recipename.lower() ferencd@0: key = unidecode.unidecode(key) ferencd@0: key = key.replace(" ", "_") ferencd@0: key = key.replace("-", "_") ferencd@0: key = key.replace("\"", "") ferencd@0: key = key.replace("\'", "") ferencd@0: return key ferencd@0: ferencd@0: ferencd@0: def process_file(fn): ferencd@0: global all_recipes ferencd@0: file1 = open(fn, 'r') ferencd@0: # contains a recipe mapped to a number, should be the same in all languages ferencd@0: all_recipes = {} ferencd@0: count = 0 ferencd@0: # Strips the newline character ferencd@0: while True: ferencd@0: ferencd@0: current_recipe = {} ferencd@0: ferencd@0: # Get next line from file ferencd@0: line = file1.readline() ferencd@0: ferencd@0: # end of file is reached ferencd@0: if not line: ferencd@0: break ferencd@0: ferencd@0: stripline = line.strip() ferencd@0: if re.match(r"[0-9]+\.", stripline) or re.match(r"\.[0-9]+\.", stripline) or re.match(r"[0-9][0-9]+\.", ferencd@0: stripline): ferencd@0: # begin recipe ferencd@0: # skip line, it's empty ferencd@0: file1.readline() ferencd@0: # contains the title ferencd@0: title = file1.readline().strip() ferencd@0: if title.endswith("("): ferencd@0: title = title.replace("(", "").strip() ferencd@0: ferencd@0: current_recipe["id"] = stripline.replace(".", "") ferencd@0: current_recipe["key"] = make_key(title) ferencd@0: current_recipe["title"] = title ferencd@0: ferencd@0: # print("{} {}".format(stripline, title) ) ferencd@0: ferencd@0: # read until we get ")" as next stripped line ferencd@0: endoff = False ferencd@0: while True: ferencd@0: skipl = file1.readline() ferencd@0: if not skipl: ferencd@0: endoff = True ferencd@0: break ferencd@0: if skipl.strip() == ")": ferencd@0: # skip the upcoming empty ferencd@0: file1.readline() ferencd@0: break ferencd@0: ferencd@0: # end of file? ferencd@0: if endoff: ferencd@0: break ferencd@0: ferencd@0: # read in the intro ferencd@0: intro = "" ferencd@0: ferencd@0: while True: ferencd@0: skipl = file1.readline() ferencd@0: if not skipl: ferencd@0: endoff = True ferencd@0: break ferencd@0: if skipl == "\n": ferencd@0: break ferencd@0: intro += skipl.strip() + " " ferencd@0: ferencd@0: # end of file? ferencd@0: if endoff: ferencd@0: break ferencd@0: ferencd@0: current_recipe["intro"] = intro.strip() ferencd@0: current_recipe["descr"] = intro.split(".")[0] ferencd@0: ferencd@0: # read in the ingredients and the if you like part ferencd@0: ingredients = {} ferencd@0: current_ingredient_part = [] ferencd@0: current_ingredient_key = "" ferencd@0: while True: ferencd@0: skipl = file1.readline() ferencd@0: if not skipl: ferencd@0: endoff = True ferencd@0: break ferencd@0: if skipl == "\n": ferencd@0: # end of ingredients, fill in the last ferencd@0: ingredients[current_ingredient_key] = current_ingredient_part ferencd@0: current_ingredient_part = [] ferencd@0: break ferencd@0: if skipl.startswith("o "): ferencd@0: current_ingredient_part.append(skipl.strip().replace("o ", "", 1).strip()) ferencd@0: else: ferencd@0: if not current_ingredient_key: ferencd@0: current_ingredient_key = skipl.strip() ferencd@0: else: ferencd@0: ingredients[current_ingredient_key] = current_ingredient_part ferencd@0: current_ingredient_part = [] ferencd@0: current_ingredient_key = skipl.strip() ferencd@0: ferencd@0: recipe = [] ferencd@0: tips_work = [] ferencd@0: destination = recipe ferencd@0: # now load the recipe itself, but be careful for tips and remaining ingredients ferencd@0: while True: ferencd@0: pos = file1.tell() ferencd@0: ferencd@0: skipl = file1.readline() ferencd@0: if not skipl: ferencd@0: endoff = True ferencd@0: break ferencd@0: ferencd@0: trimmed = skipl.strip() ferencd@0: ferencd@0: # did we cross into the next recipe? ferencd@0: if re.match(r"[0-9]+\.", trimmed) and trimmed: ferencd@0: file1.seek(pos) ferencd@0: break ferencd@0: ferencd@0: # line number? ferencd@0: if re.match(r"[0-9]+", trimmed): ferencd@0: continue ferencd@0: ferencd@0: # empty line? ferencd@0: if trimmed == "\n": ferencd@0: continue ferencd@0: ferencd@0: # remaining from the ingredients? ferencd@0: if skipl.startswith("o "): ferencd@0: ingredients[current_ingredient_key].append(skipl.strip().replace("o ", "", 1).strip()) ferencd@0: continue ferencd@0: ferencd@0: if trimmed == "Tips": ferencd@0: destination = tips_work ferencd@0: ferencd@0: destination.append(trimmed) ferencd@0: ferencd@0: # fix the tips, so that it does mot contain the . and empty lines ferencd@0: tips = [] ferencd@0: current_tip = "" ferencd@0: for tip in tips_work: ferencd@0: if not tip.strip(): ferencd@0: continue ferencd@0: if tip == '•' or tip == "-": ferencd@0: if current_tip: ferencd@0: tips.append(current_tip) ferencd@0: current_tip = "" ferencd@0: elif tip != "Tips": ferencd@0: current_tip += tip + " " ferencd@0: tips.append(current_tip) ferencd@0: ferencd@0: # fix the recipe, so that it contains only one line, separated by newlines ferencd@0: current_recipe["fullrecipe"] = " ".join(recipe) ferencd@0: current_recipe["ingredients"] = ingredients ferencd@0: ferencd@0: all_recipes[current_recipe["id"]] = current_recipe ferencd@0: ferencd@0: return all_recipes ferencd@0: ferencd@0: ferencd@0: # ingredient_extractor() ferencd@0: ferencd@0: files = ["book2-lin-compressed.txt"] ferencd@0: ferencd@0: list_all_recipes = [] ferencd@0: for file in files: ferencd@0: list_all_recipes.append(process_file(file)) ferencd@0: ferencd@0: if os.path.exists("temp_w"): ferencd@0: shutil.rmtree("temp_w") ferencd@0: ferencd@0: if not os.path.exists("temp_w"): ferencd@0: os.mkdir("temp_w") ferencd@0: ferencd@0: ferencd@0: for rl in list_all_recipes: ferencd@0: for r in rl.values(): ferencd@0: # create the directory for the food ferencd@0: key = "" ferencd@0: for rec_id_key in all_recipe_keys: ferencd@0: if int(r["id"]) in rec_id_key: ferencd@0: key = rec_id_key[int(r["id"])] ferencd@0: break ferencd@0: if not key: ferencd@0: pprint.pprint(r) ferencd@0: print("No key found for this above") ferencd@0: sys.exit(3) ferencd@0: ferencd@0: print("update translations set no='{}' where source='{}_name';".format(r["title"], key)) ferencd@0: continue ferencd@0: wpath = os.path.join("temp_w", key) ferencd@0: wpath_no = os.path.join("temp_w", key, "no") ferencd@0: if not os.path.exists(wpath): ferencd@0: os.mkdir(wpath) ferencd@0: os.mkdir(wpath_no) ferencd@0: ferencd@0: # descr.md ferencd@0: filepath_descrmd_gb = os.path.join("temp_w", key, "no", "descr.md") ferencd@0: descr_f = open(filepath_descrmd_gb, "w") ferencd@0: descr_f.write(r["descr"] + ".") ferencd@0: descr_f.close() ferencd@0: ferencd@0: # intro.md ferencd@0: filepath_intromd_gb = os.path.join("temp_w", key, "no", "intro.md") ferencd@0: intro_f = open(filepath_intromd_gb, "w") ferencd@0: intro_f.write(r["intro"]) ferencd@0: intro_f.close() ferencd@0: ferencd@0: # recipe.md ferencd@0: filepath_recipemd_gb = os.path.join("temp_w", key, "no", "recipe.md") ferencd@0: recipe_f = open(filepath_recipemd_gb, "w") ferencd@0: ## ingredients ferencd@0: for ing_root in r["ingredients"].keys(): ferencd@0: recipe_f.write("\n### " + ing_root + "\n") ferencd@0: for ing in r["ingredients"][ing_root]: ferencd@0: recipe_f.write("- " + ing + "\n") ferencd@0: ## the recipe itself ferencd@0: recipe_f.write("\n### Slik gjør du det\n") ferencd@0: ferencd@0: ### break up the fullrecip ein order to not to have more than two sentenced per paragraph ferencd@0: full_recipe_text = r["fullrecipe"] ferencd@0: newline_counter = 0 ferencd@0: chars_written = 0 ferencd@0: newline_needed = False ferencd@0: for c in full_recipe_text: ferencd@0: recipe_f.write(c) ferencd@0: chars_written += 1 ferencd@0: if c == '.': ferencd@0: newline_counter += 1 ferencd@0: if newline_counter >= 2: ferencd@0: if chars_written > 120: ferencd@0: newline_counter = 0 ferencd@0: recipe_f.write("\n\n") ferencd@0: ferencd@0: ## tips ferencd@0: if "tips" in r: ferencd@0: recipe_f.write("\nTips\n") ferencd@0: for tip in r["tips"].values(): ferencd@0: recipe_f.write("- " + tip) ferencd@0: recipe_f.close() ferencd@0: