{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import time\n", "def measure(f):\n", " start = time.time()\n", " f()\n", " end = time.time()\n", " return end-start" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Dynamic Programming Examples\n", "\n", "## Editing Distance\n", "\n", "### Recursive" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def lev_r(x,n,y,m):\n", " \"\"\"\n", " recursive distance function that computes the edit distance between strings x and y\n", " \"\"\"\n", " if n == 0:\n", " return m\n", " if m == 0:\n", " return n\n", " l1 = lev_r(x,n-1,y,m)+1\n", " l2 = lev_r(x,n,y,m-1)+1\n", " l3 = lev_r(x,n-1,y,m-1) + (0 if x[n-1]==y[m-1] else 1)\n", " return min(l1,l2,l3)\n", "\n", "def lev_R(x,y):\n", " return lev_r(x,len(x),y,len(y))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(lev_R(\"ETH\",\"EPFL\"))\n", "print(lev_R(\"ETHZurich\",\"EPFLausanne\"))\n", "print(measure(lambda: lev_R(\"ETHZurich\",\"EPFLausanne\")))\n", "print(lev_R(\"ZIEGE\",\"TIGER\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Memoization" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def lev_m(x,n,y,m,d):\n", " if (n,m) in d:\n", " return d[(n,m)]\n", " else:\n", " if n == 0:\n", " return m\n", " if m == 0:\n", " return n\n", " l1 = lev_m(x,n-1,y,m,d)+1\n", " l2 = lev_m(x,n,y,m-1,d)+1\n", " l3 = lev_m(x,n-1,y,m-1,d) + (0 if x[n-1]==y[m-1] else 1)\n", " d[(n,m)] = min(l1,l2,l3)\n", " return d[(n,m)]\n", "\n", "def lev_M(x,y):\n", " return lev_m(x,len(x),y,len(y),{})" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(lev_M(\"ETH\",\"EPFL\"))\n", "print(lev_M(\"ETHZurich\",\"EPFLausanne\"))\n", "print(measure(lambda: lev_M(\"ETHZurich\",\"EPFLausanne\")))\n", "print(lev_M(\"ZIEGE\",\"TIGER\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Table-based approach" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def lev_t(x,y):\n", " N = len(x)+1\n", " M = len(y)+1\n", " d = [[0]*M for i in range(0,N)]\n", " # do not use [[0]*N]*M because this creates a matrix referencing a single row!\n", " for n in range (0,N):\n", " for m in range (0,M):\n", " if n == 0:\n", " d[n][m] = m\n", " elif m == 0:\n", " d[n][m] = n\n", " else:\n", " l1 = d[n-1][m]+1\n", " l2 = d[n][m-1]+1\n", " l3 = d[n-1][m-1]+(0 if x[n-1]==y[m-1] else 1)\n", " d[n][m] = min(l1,l2,l3)\n", " return d\n", "\n", "def lev_T(x,y):\n", " d = lev_t(x,y)\n", " return d[len(x)][len(y)]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(lev_T(\"ETH\",\"EPFL\"))\n", "print(lev_T(\"ETHZurich\",\"EPFLausanne\"))\n", "print(measure(lambda: lev_T(\"ETHZurich\",\"EPFLausanne\")))\n", "print(lev_T(\"ZIEGE\",\"TIGER\"))\n", "print(lev_t(\"FISCH\",\"FROSCH\"))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Which path" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def insert(string,index,c):\n", " return string[:index] + c + string[index:]\n", "def remove(string,index):\n", " return string[:index] + string[index+1:]\n", "def replace(string,index,c):\n", " return string[:index] + c + string[index+1:]\n", "\n", "def edit(x,y):\n", " d = lev_t(x,y)\n", " word = x\n", " # reconstruct\n", " n = len(x)\n", " m = len(y)\n", " while n+m > 0:\n", " if n>0 and d[n][m] == d[n-1][m]+1:\n", " print(\"remove: \", x[n-1])\n", " word = remove(word,n-1)\n", " print(word)\n", " n = n-1\n", " elif m>0 and d[n][m] == d[n][m-1]+1:\n", " print(\"insert:\", y[m-1])\n", " word = insert(word,n,y[m-1])\n", " print(word)\n", " m = m-1\n", " else:\n", " assert(d[n][m]==d[n-1][m-1]+(0 if x[n-1]==y[m-1] else 1))\n", " if (d[n][m]==d[n-1][m-1]):\n", " print(\"keep:\",x[n-1])\n", " else:\n", " print(\"replace:\",x[n-1],\" by \", y[m-1])\n", " word = replace(word,n-1,y[m-1])\n", " n = n-1\n", " m = m-1\n", " print(word)\n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "edit(\"FISCH\",\"FROSCH\")\n", "#edit(\"ZIEGE\",\"TIGER\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.5" } }, "nbformat": 4, "nbformat_minor": 2 }