Levenshtein Distance in C

Da Bioingegneria Elettronica e Informatica.

Nicola Altini nicola.altini@poliba.it

Vitoantonio Bevilacqua vitoantonio.bevilacqua@poliba.it

Slides

Levenshtein Distance in C

Including Libraries

  1. #include <stdlib.h>
  2. #include <stdio.h>  
  3. #include <string.h>

Macros

  1. #define MAXNCHARS 50
  2. #define VERBOSE 1
  3. #define VERSION 1

Declaration of Functions

  1. void print_matrix(int **, int, int);
  2. int Levenshtein_distance(char *, char *);
  3. int Levenshtein_distance_v2(char *, char *);
  4. int minimum(int, int, int);

Main

  1. int main(void) {
  2.     FILE *query, *db;
  3.     char str1[MAXNCHARS], str2[MAXNCHARS];
  4.     int d;
  5.  
  6.     printf("VERSION: %d\n", VERSION);
  7.     printf("VERBOSE: %d\n", VERBOSE);
  8.  
  9.     query = fopen("query.fa","r");
  10.     if (query == NULL) {
  11.         printf("It is not possible to open query.fa!\n");
  12.         scanf ("%[^\n]%*c", str1);
  13.     } else {
  14.         fscanf (query, "%[^\n]%*c", str1);
  15.     }
  16.  
  17.     db = fopen("db.fna", "r");
  18.     if (db == NULL) {
  19.         printf("It is not possible to open db.fna!\n");
  20.         scanf ("%[^\n]%*c", str2);
  21.     } else {
  22.         fscanf (db, "%[^\n]%*c", str2);
  23.     }
  24.  
  25.     printf("Query:\n'%s'\n", str1);
  26.     printf("Database:\n'%s'\n", str2);
  27.  
  28.     if (VERSION == 1) {
  29.         d = Levenshtein_distance(str1,str2);
  30.     } else if (VERSION == 2) {
  31.         d = Levenshtein_distance_v2(str1,str2);
  32.     }
  33.  
  34.     printf("La distanza di Levenshtein e': %d", d);
  35.     return 0;
  36. }

Definition of Functions

  1. int Levenshtein_distance(char *x, char *y) {
  2.     int m = strlen(x);
  3.     int n = strlen(y);
  4.     printf("m = %d , n = %d\n", m, n);
  5.  
  6.     int i, j;
  7.  
  8.     int distance;
  9.  
  10.     int **d = malloc((m + 1) * sizeof(int*));
  11.  
  12.     for(i = 0; i <= m; i++)
  13.         d[i] = malloc((n + 1) * sizeof(int));
  14.  
  15.     for(i = 0; i <= m; i++)
  16.         d[i][0] = i;
  17.  
  18.     for(j = 1; j <= n; j++)
  19.         d[0][j] = j;
  20.  
  21.     if (VERBOSE) print_matrix(d, m, n);
  22.  
  23.     for(i = 1; i <= m; i++) {
  24.         for(j = 1; j <= n; j++) {
  25.             if(x[i - 1] != y[j - 1]) {
  26.                 int k = minimum(
  27.                     d[i][j - 1],
  28.                     d[i - 1][j],
  29.                     d[i - 1][j - 1]
  30.                 );
  31.                 d[i][j] = k + 1;
  32.             } else {
  33.                 d[i][j] = d[i - 1][j - 1];
  34.             }
  35.             if (VERBOSE) print_matrix(d, m, n);
  36.         }
  37.     }
  38.  
  39.     distance = d[m][n];
  40.  
  41.     for(i = 0; i <= m; i++)
  42.         free(d[i]);
  43.  
  44.     free(d);
  45.     return distance;
  46. }
  47.  
  48. void print_matrix(int **matrix, int m, int n) {
  49.     int i, j;
  50.  
  51.     for (i = 0; i <= m; i++) {
  52.         for (j = 0; j <= n; j++) {
  53.             printf("%d ", matrix[i][j]);
  54.         }
  55.         printf("\n");
  56.     }
  57.     printf("\n\n");
  58. }
  59.  
  60. int Levenshtein_distance_v2(char *x, char *y) { 
  61.     int m = strlen(x);
  62.     int n = strlen(y);
  63.     int i, j;
  64.     int distance;
  65.     int *prev = malloc((n + 1) * sizeof(int));
  66.     int *curr = malloc((n + 1) * sizeof(int));
  67.     int *tmp = 0;
  68.     for(i = 0; i <= n; i++)
  69.         prev[i] = i;
  70.     for(i = 1; i <= m; i++) {
  71.         curr[0] = i;
  72.         for(j = 1; j <= n; j++) {
  73.             if(x[i - 1] != y[j - 1]) {
  74.                 int k = minimum(curr[j - 1], prev[j - 1], prev[j]);
  75.                 curr[j] = k + 1;
  76.                 } else {
  77.                     curr[j] = prev[j - 1];
  78.             }
  79.         }
  80.         tmp = prev;
  81.         prev = curr;
  82.         curr = tmp;
  83.         memset((void*) curr, 0, sizeof(int) * (n + 1)); 
  84.     }
  85.     distance = prev[n];
  86.     free(curr);
  87.     free(prev);
  88.     return distance;
  89. }
  90.  
  91. int minimum(int a, int b, int c) {
  92.  
  93. /* funzione che calcola il minimo di 3 valori */
  94.  
  95.     int min = a;
  96.  
  97.     if (b < min) min = b;
  98.     if (c < min) min = c;
  99.  
  100.     return min;
  101. }