Firebird: Levenshtein-Distanz
3. Code Generator
Hier kann der SQL-Quelltext zur Berechnung der Levenshtein-Distanz nach eigenen Bedürfnissen zusammengestellt werden. Die einzelnen Bezeichner werden auf Länge und doppelte Namen geprüft. Zu lange Bezeichner werden gegebenenfalls auf 31 Zeichen gekürzt. Weitere Prüfungen, insbesondere die Fehlerfreie Ausführung des SQL-Codes, werden nicht vorgenommen. Der generierte SQL-Quelltext sollte auf jeden Fall in einer Testdatenbank validiert werden, bevor er in eine vorhanden Datenbank aufgenommen wird.generierter Code
SET SQL DIALECT 3;
/******************************************************************************/
/**** Domains ****/
/******************************************************************************/
CREATE DOMAIN "dom_BOOL" AS
SMALLINT
NOT NULL
CHECK (VALUE IN (0, 1));
CREATE DOMAIN "dom_INT32" AS
INTEGER;
CREATE DOMAIN "dom_SingleChar" AS
CHAR(1) CHARACTER SET UTF8;
CREATE DOMAIN "dom_Word" AS
VARCHAR(1021) CHARACTER SET UTF8
NOT NULL;
/******************************************************************************/
/**** Stored Procedures ****/
/******************************************************************************/
SET TERM ^ ;
CREATE PROCEDURE "prc_levenshtein" (
WORD1 TYPE OF "dom_Word",
WORD2 TYPE OF "dom_Word",
USE_DAMERAU TYPE OF "dom_BOOL" = 0)
RETURNS (
DISTANCE TYPE OF "dom_INT32")
AS
BEGIN
SUSPEND;
END^
SET TERM ; ^
/******************************************************************************/
/**** Tables ****/
/******************************************************************************/
CREATE GLOBAL TEMPORARY TABLE "_temp_levenshtein" (
I "dom_INT32" NOT NULL,
J "dom_INT32" NOT NULL,
D "dom_INT32"
) ON COMMIT PRESERVE ROWS;
/******************************************************************************/
/**** Primary Keys ****/
/******************************************************************************/
ALTER TABLE "_temp_levenshtein" ADD CONSTRAINT "PK_temp_levenshtein" PRIMARY KEY (I, J);
/******************************************************************************/
/**** Stored Procedures ****/
/******************************************************************************/
SET TERM ^ ;
ALTER PROCEDURE "prc_levenshtein" (
WORD1 TYPE OF "dom_Word",
WORD2 TYPE OF "dom_Word",
USE_DAMERAU TYPE OF "dom_BOOL" = 0)
RETURNS (
DISTANCE TYPE OF "dom_INT32")
AS
DECLARE VARIABLE LEN1 TYPE OF "dom_INT32";
DECLARE VARIABLE LEN2 TYPE OF "dom_INT32";
DECLARE VARIABLE I TYPE OF "dom_INT32";
DECLARE VARIABLE J TYPE OF "dom_INT32";
DECLARE VARIABLE COST TYPE OF "dom_INT32";
DECLARE VARIABLE CHAR_I TYPE OF "dom_SingleChar";
DECLARE VARIABLE CHAR_J TYPE OF "dom_SingleChar";
BEGIN
/*******************************************************************************/
/* Damerau-Levenshtein-Distanz bei Wikipedia */
/* http://de.wikipedia.org/wiki/Levenstein-Distanz#Damerau-Levenshtein-Distanz */
/*******************************************************************************/
/* Wortlängen zwischenspeichern */
LEN1 = COALESCE(CHAR_LENGTH(WORD1), 0);
LEN2 = COALESCE(CHAR_LENGTH(WORD2), 0);
/* Array initialisieren */
I = 0;
WHILE (I <= LEN1)
DO
BEGIN
/* eintragen */
UPDATE OR INSERT INTO "_temp_levenshtein"
(I, J, D)
VALUES
(:I, 0, :I)
MATCHING
(I, J);
/* nächste Runde */
I = I + 1;
END
J = 0;
WHILE (J <= LEN2)
DO
BEGIN
/* eintragen */
UPDATE OR INSERT INTO "_temp_levenshtein"
(I, J, D)
VALUES
(0, :J, :J3)
MATCHING
(I, J);
/* nächste Runde */
J = J + 1;
END
/* Berechnung */
I = 1;
WHILE (I <= LEN1)
DO
BEGIN
CHAR_I = SUBSTRING(WORD1 FROM I FOR 1);
J = 1;
WHILE (J <= LEN2)
DO
BEGIN
CHAR_J = SUBSTRING(WORD2 FROM J FOR 1);
IF (CHAR_I = CHAR_J)
THEN
COST = 0;
ELSE
COST = 1;
IF (
(USE_DAMERAU = 1)
AND
(I > 1)
AND
(J > 1)
AND
(CHAR_I = SUBSTRING(WORD2 FROM J - 1 FOR 1))
AND
(SUBSTRING(WORD1 FROM I - 1 FOR 1) = CHAR_J)
)
THEN /* Damerau-Levenshtein-Distanz */
BEGIN
IF (COST = 1)
THEN /* COST = 1 */
UPDATE OR INSERT INTO "_temp_levenshtein"
(I, J, D)
VALUES
(
:I, :J,
(
SELECT MIN(D) + 1
FROM "_temp_levenshtein"
WHERE (I = :I - 1 AND J = :J )
OR (I = :I AND J = :J - 1)
OR (I = :I - 1 AND J = :J - 1)
OR (I = :I - 2 AND J = :J - 2)
)
)
MATCHING
(I, J);
ELSE /* COST = 0 */
UPDATE OR INSERT INTO "_temp_levenshtein"
(I, J, D)
VALUES
(
:I, :J,
(
SELECT MIN(D) FROM
(
SELECT D + 1 AS D
FROM "_temp_levenshtein"
WHERE (I = :I - 1 AND J = :J )
OR (I = :I AND J = :J - 1)
UNION ALL
SELECT D
FROM "_temp_levenshtein"
WHERE (I = :I - 1 AND J = :J - 1)
OR (I = :I - 2 AND J = :J - 2)
)
)
)
MATCHING
(I, J);
END
ELSE /* Levenshtein-Distanz */
BEGIN
IF (COST = 1)
THEN /* COST = 1 */
UPDATE OR INSERT INTO "_temp_levenshtein"
(I, J, D)
VALUES
(
:I, :J,
(
SELECT MIN(D) + 1
FROM "_temp_levenshtein"
WHERE (I = :I - 1 AND J = :J )
OR (I = :I AND J = :J - 1)
OR (I = :I - 1 AND J = :J - 1)
)
)
MATCHING
(I, J);
ELSE /* COST = 0 */
UPDATE OR INSERT INTO "_temp_levenshtein"
(I, J, D)
VALUES
(
:I, :J,
(
SELECT MIN(D) FROM
(
SELECT D + 1 AS D
FROM "_temp_levenshtein"
WHERE (I = :I - 1 AND J = :J )
OR (I = :I AND J = :J - 1)
UNION ALL
SELECT D
FROM "_temp_levenshtein"
WHERE (I = :I - 1 AND J = :J - 1)
)
)
)
MATCHING
(I, J);
END
/* nächste Runde */
J = J + 1;
END
/* nächste Runde */
I = I + 1;
END
/* Ergebnis */
SELECT D
FROM "_temp_levenshtein"
WHERE I = :LEN1 AND J = :LEN2
INTO :DISTANCE;
END^
SET TERM ; ^
/******************************************************************************/
/**** Descriptions ****/
/******************************************************************************/
UPDATE RDB$FIELDS
SET RDB$DESCRIPTION = 'Boolescher Wert'
WHERE (RDB$FIELD_NAME = 'dom_BOOL');
UPDATE RDB$FIELDS
SET RDB$DESCRIPTION = 'Ganzzahl mit Vorzeichen 32 bit'
WHERE (RDB$FIELD_NAME = 'dom_INT32');
UPDATE RDB$FIELDS
SET RDB$DESCRIPTION = 'Einzelzeichen'
WHERE (RDB$FIELD_NAME = 'dom_SingleChar');
UPDATE RDB$FIELDS
SET RDB$DESCRIPTION = 'Wortfeld'
WHERE (RDB$FIELD_NAME = 'dom_Word');
COMMIT WORK;
/******************************************************************************/
/**** Privileges ****/
/******************************************************************************/
/* Privileges of users */
GRANT ALL ON RDB$FORMATS TO DBUSER WITH GRANT OPTION;
GRANT ALL ON RDB$PAGES TO DBUSER WITH GRANT OPTION;
GRANT ALL ON RDB$ROLES TO DBUSER WITH GRANT OPTION;
GRANT ALL ON "_temp_levenshtein" TO DBUSER WITH GRANT OPTION;
GRANT EXECUTE ON PROCEDURE "prc_levenshtein" TO DBUSER;
GRANT SELECT ON RDB$FORMATS TO PUBLIC;
GRANT SELECT ON RDB$PAGES TO PUBLIC;
GRANT SELECT ON RDB$ROLES TO PUBLIC;
/* Privileges of procedures */
GRANT ALL ON "_temp_levenshtein" TO PROCEDURE "prc_levenshtein";