{"id":206066,"date":"2013-12-17T08:23:03","date_gmt":"2013-12-17T04:23:03","guid":{"rendered":"http:\/\/savepearlharbor.com\/?p=206066"},"modified":"-0001-11-30T00:00:00","modified_gmt":"-0001-11-29T21:00:00","slug":"","status":"publish","type":"post","link":"https:\/\/savepearlharbor.com\/?p=206066","title":{"rendered":"<span class=\"post_title\">\u041d\u0435\u0447\u0435\u0442\u043a\u0438\u0439 \u0434\u0438\u043d\u0430\u043c\u0438\u0447\u0435\u0441\u043a\u0438\u0439 \u0442\u0435\u043a\u0441\u0442\u043e\u0432\u044b\u0439 \u043f\u043e\u0438\u0441\u043a? \u041d\u0435 \u0442\u0430\u043a \u0443\u0436 \u0438 \u0441\u0442\u0440\u0430\u0448\u043d\u043e<\/span>"},"content":{"rendered":"<div class=\"content html_format\">   \t<img decoding=\"async\" src=\"http:\/\/habr.habrastorage.org\/post_images\/132\/9fd\/1f6\/1329fd1f6566c35ae7238856584e78f5.jpg\" alt=\"\u0412\u043b\u0430\u0434\u0438\u043c\u0438\u0440 \u0420\u0443\u043c\u044f\u043d\u0446\u0435\u0432 - \u043f\u0440\u0438\u043a\u043b\u044e\u0447\u0435\u043d\u0438\u044f \u041f\u0438\u0442\u0435\u0440\u0441\u043a\u043e\u0433\u043e... \u043a\u043e\u0442\u0430\"\/><br \/>  \u0421\u0443\u0449\u0435\u0441\u0442\u0432\u0443\u0435\u0442 \u0443\u0441\u0442\u043e\u0439\u0447\u0438\u0432\u043e\u0435 \u043c\u043d\u0435\u043d\u0438\u0435, \u0447\u0442\u043e \u043d\u0435\u0447\u0435\u0442\u043a\u0438\u0439 \u043f\u043e\u0438\u0441\u043a \u0432 \u0434\u0438\u043d\u0430\u043c\u0438\u043a\u0435 (\u043e\u043d\u043b\u0430\u0439\u043d) <br \/>  \u043c\u0430\u043b\u043e\u0434\u043e\u0441\u0442\u0443\u043f\u0435\u043d \u0432 \u0441\u0438\u043b\u0443 \u0441\u0432\u043e\u0435\u0439 \u043d\u0435\u0432\u0435\u0440\u043e\u044f\u0442\u043d\u043e\u0439 \u0441\u043b\u043e\u0436\u043d\u043e\u0441\u0442\u0438. <br \/>  \u0414\u0430\u043b\u0435\u0435 \u043c\u044b \u0431\u0443\u0434\u0435\u043c \u0440\u0430\u0437\u0432\u0435\u0438\u0432\u0430\u0442\u044c \u044d\u0442\u043e \u0434\u043e\u0441\u0430\u0434\u043d\u043e\u0435 \u0437\u0430\u0431\u043b\u0443\u0436\u0434\u0435\u043d\u0438\u0435 \u0438 \u043f\u043e\u043a\u0430\u0436\u0435\u043c, <br \/>  \u0447\u0442\u043e \u043f\u043e\u0441\u0442\u0440\u043e\u0438\u0442\u044c \u0441\u0432\u043e\u044e \u0441\u043e\u0431\u0441\u0442\u0432\u0435\u043d\u043d\u0443\u044e \u043f\u043e\u0438\u0441\u043a\u043e\u0432\u0443\u044e \u0441\u0438\u0441\u0442\u0435\u043c\u0443 \u0441\u043e \u0441\u043d\u043e\u0441\u043d\u043e\u0439 \u043f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u0438\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u044c\u044e<br \/>   \u043d\u0430 \u043d\u0435 \u0442\u0430\u043a\u0438\u0445 \u0443\u0436 \u0438 \u043c\u0430\u043b\u0435\u043d\u044c\u043a\u0438\u0445 \u0434\u0430\u043d\u043d\u044b\u0445 \u0434\u043e\u0441\u0442\u0443\u043f\u043d\u043e \u043a\u0430\u0436\u0434\u043e\u043c\u0443.<br \/>  <a name=\"habracut\"><\/a><br \/>  \u041e\u0441\u043d\u043e\u0432\u043d\u044b\u0445 \u0438\u0434\u0435\u0438 \u0442\u0430\u043a\u043e\u0432\u044b:  <\/p>\n<ul>\n<li>\u0414\u043b\u044f \u0441\u043b\u043e\u0432\u0430\u0440\u043d\u043e\u0433\u043e \u043f\u043e\u0438\u0441\u043a\u0430 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c \u0442\u0440\u0438\u0433\u0440\u0430\u043c\u043c\u044b<\/li>\n<li>\u0425\u0440\u0430\u043d\u0435\u043d\u0438\u0435 \u0434\u0430\u043d\u043d\u044b\u0445 \u043c\u044b \u0434\u043e\u0432\u0435\u0440\u0438\u043c \u0421\u0423\u0411\u0414<\/li>\n<li>\u0414\u043b\u044f \u043f\u043e\u0432\u044b\u0448\u0435\u043d\u0438\u044f \u0441\u043a\u043e\u0440\u043e\u0441\u0442\u0438 \u0441\u043b\u043e\u0432\u0430\u0440\u043d\u043e\u0433\u043e \u043f\u043e\u0438\u0441\u043a\u0430, \u0441\u043b\u043e\u0432\u0430\u0440\u044c \u0432\u0441\u0435\u0433\u0434\u0430 \u043d\u0430\u0445\u043e\u0434\u0438\u0442\u0441\u044f \u0432 \u043f\u0430\u043c\u044f\u0442\u0438<\/li>\n<li>\u0414\u043b\u044f \u043f\u043e\u0434\u0434\u0435\u0440\u0436\u0430\u043d\u0438\u044f \u0441\u043b\u043e\u0432\u0430\u0440\u044f \u0432 \u0430\u043a\u0442\u0443\u0430\u043b\u044c\u043d\u043e\u043c \u0441\u043e\u0441\u0442\u043e\u044f\u043d\u0438\u0438, \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u043c \u0442\u0440\u0438\u0433\u0433\u0435\u0440\u044b<\/li>\n<\/ul>\n<p>  <b>\u0422\u0435\u0441\u0442\u043e\u0432\u044b\u0435 \u0434\u0430\u043d\u043d\u044b\u0435.<\/b><br \/>  \u0414\u043b\u044f \u043e\u043f\u044b\u0442\u043e\u0432 \u043c\u044b \u0432\u043e\u0437\u044c\u043c\u0435\u043c \u043d\u0435\u043c\u043d\u043e\u0433\u043e \u041f\u0443\u0448\u043a\u0438\u043d\u0430 \u0438 \u0414\u043e\u0441\u0442\u043e\u0435\u0432\u0441\u043a\u043e\u0433\u043e, \u00ab\u0411\u043e\u0436\u0435\u0441\u0442\u0432\u0435\u043d\u043d\u0443\u044e \u041a\u043e\u043c\u0435\u0434\u0438\u044e\u00bb \u0410\u043b\u0438\u0433\u044c\u0435\u0440\u0438 \u0430 \u0442\u0430\u043a\u0436\u0435 \u00ab\u0412\u043e\u0439\u043d\u0443 \u0438 \u043c\u0438\u0440\u00bb \u0422\u043e\u043b\u0441\u0442\u043e\u0433\u043e \u0432 \u0430\u043d\u0433\u043b\u0438\u0439\u0441\u043a\u043e\u043c \u043f\u0435\u0440\u0435\u0432\u043e\u0434\u0435 (\u0438\u0441\u0442\u043e\u0447\u043d\u0438\u043a \u2014 <a href=\"http:\/\/www.lib.ru\">www.lib.ru<\/a> &#038; <a href=\"http:\/\/www.gutenberg.org\">www.gutenberg.org<\/a>). \u0412\u0441\u0435\u0433\u043e 18 \u043c\u0431 \u0432 \u043a\u043e\u0434\u0438\u0440\u043e\u0432\u043a\u0435 utf8.<br \/>  \u041a\u0430\u0436\u0434\u0430\u044f \u043d\u0435\u043f\u0443\u0441\u0442\u0430\u044f \u0441\u0442\u0440\u043e\u043a\u0430 \u0442\u0435\u043a\u0441\u0442\u0430 \u0441\u0442\u0430\u043d\u043e\u0432\u0438\u0442\u0441\u044f \u043e\u0434\u043d\u043e\u0439 \u0437\u0430\u043f\u0438\u0441\u044c\u044e \u0432 \u043d\u0430\u0448\u0435\u0439 \u0431\u0430\u0437\u0435. <br \/>  \u0415\u0441\u043b\u0438 \u0441\u0442\u0440\u043e\u043a\u0430 \u0434\u043b\u0438\u043d\u043d\u0430\u044f, \u043e\u043d\u0430 \u0440\u0430\u0437\u0431\u0438\u0432\u0430\u0435\u0442\u0441\u044f \u043f\u043e 800 \u0441\u043b\u043e\u0432.<br \/>  \u0412\u0441\u0435\u0433\u043e \u0432\u044b\u0445\u043e\u0434\u0438\u0442 ~160 \u0442\u044b\u0441\u044f\u0447 \u0437\u0430\u043f\u0438\u0441\u0435\u0439<\/p>\n<p>  <b>\u0421\u0423\u0411\u0414<\/b><br \/>  \u041a\u0430\u043a \u0438 <a href=\"http:\/\/habrahabr.ru\/post\/196682\/\">\u0440\u0430\u043d\u044c\u0448\u0435<\/a>, \u0431\u0443\u0434\u0435\u043c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c <a href=\"http:\/\/virtuoso.openlinksw.com\/dataspace\/doc\/dav\/wiki\/Main\/\">OpenLink Virtuoso<\/a> \u0432\u0435\u0440\u0441\u0438\u0438 7.0.0. \u041e\u0434\u043d\u0438\u043c \u043b\u0438\u0448\u044c \u0421-\u043f\u043b\u0430\u0433\u0438\u043d\u043e\u043c \u043e\u0431\u043e\u0439\u0442\u0438\u0441\u044c \u043d\u0435 \u0443\u0434\u0430\u0441\u0442\u0441\u044f, \u0442.\u043a. \u0444\u0443\u043d\u043a\u0446\u0438\u043e\u043d\u0430\u043b\u0430, \u0434\u043e\u0441\u0442\u0443\u043f\u043d\u043e\u0433\u043e \u0434\u043b\u044f \u043f\u043b\u0430\u0433\u0438\u043d\u043e\u0432 \u043d\u0435\u0434\u043e\u0441\u0442\u0430\u0442\u043e\u0447\u043d\u043e. \u041f\u0440\u0438\u0434\u0435\u0442\u0441\u044f \u043f\u043e\u0434\u043a\u043b\u044e\u0447\u0430\u0442\u044c \u0441\u0435\u0440\u0432\u0435\u0440 \u0432 \u043a\u0430\u0447\u0435\u0441\u0442\u0432\u0435 OEM \u0440\u0430\u0437\u0434\u0435\u043b\u044f\u0435\u043c\u043e\u0439 \u0431\u0438\u0431\u043b\u0438\u043e\u0442\u0435\u043a\u0438 (libvirtuoso-t) \u0438 \u043f\u0440\u0438 \u044d\u0442\u043e\u043c \u043d\u0435\u043c\u043d\u043e\u0433\u043e \u043f\u043e\u043a\u043e\u043b\u0434\u043e\u0432\u0430\u0442\u044c \u0441\u043e \u0441\u043f\u0438\u0441\u043a\u043e\u043c \u044d\u043a\u0441\u043f\u043e\u0440\u0442\u0438\u0440\u043e\u0432\u0430\u043d\u043d\u044b\u0445 \u0444\u0443\u043d\u043a\u0446\u0438\u0439.<\/p>\n<p>  <b>\u0421\u0445\u0435\u043c\u0430 \u0434\u0430\u043d\u043d\u044b\u0445<\/b><br \/>  \u0412\u043e \u043f\u0435\u0440\u0432\u044b\u0445, \u0441\u043b\u043e\u0432\u0430\u0440\u044c:<\/p>\n<pre><code class=\"sql\">create table MRC_WORDS (     WD_ID               integer,     WD_ITSELF\t    nvarchar,     WD_COUNT            integer,      \t     primary key (WD_ID)); <\/code><\/pre>\n<p>\u041a\u0430\u0436\u0434\u0430\u044f \u0437\u0430\u043f\u0438\u0441\u044c \u0441\u043e\u0434\u0435\u0440\u0436\u0438\u0442 \u0441\u0430\u043c\u043e \u0441\u043b\u043e\u0432\u043e, \u0435\u0451 \u0438\u0434\u0435\u043d\u0442\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440 \u0438 \u0447\u0430\u0441\u0442\u043e\u0442\u0443 \u0435\u0433\u043e \u0432\u0441\u0442\u0440\u0435\u0447\u0430\u0435\u043c\u043e\u0441\u0442\u0438. \u041e\u0431\u043d\u043e\u0432\u043b\u044f\u0442\u044c \u0447\u0430\u0441\u0442\u043e\u0442\u0443 \u043f\u0440\u0438 \u043a\u0430\u0436\u0434\u043e\u0439 \u0432\u0441\u0442\u0430\u0432\u043a\u0435 \u0442\u0435\u043a\u0441\u0442\u0430 \u0441\u043b\u0438\u0448\u043a\u043e\u043c \u0434\u043e\u0440\u043e\u0433\u043e, \u043f\u043e\u044d\u0442\u043e\u043c\u0443 \u043e\u043d\u0430 \u043c\u0435\u043d\u044f\u0435\u0442\u0441\u044f \u0432 \u043f\u0430\u043c\u044f\u0442\u0438 \u0438 \u0437\u0430\u043f\u0438\u0441\u044b\u0432\u0430\u0435\u0442\u0441\u044f \u043f\u0435\u0440\u0438\u043e\u0434\u0438\u0447\u0435\u0441\u043a\u0438. \u041a\u0430\u043a \u0432\u0430\u0440\u0438\u0430\u043d\u0442, \u0435\u0435 \u043c\u043e\u0436\u043d\u043e \u0430\u043a\u0442\u0443\u0430\u043b\u0438\u0437\u0438\u0440\u043e\u0432\u0430\u0442\u044c \u00ab\u043f\u043e \u043a\u0440\u043e\u043d\u0443\u00bb. \u0427\u0430\u0441\u0442\u043e\u0442\u0430 \u044d\u0442\u0430 \u043c\u043e\u0436\u0435\u0442 \u0431\u044b\u0442\u044c \u043f\u043e\u043b\u0435\u0437\u043d\u0430 \u0434\u043b\u044f \u0440\u0430\u043d\u0436\u0438\u0440\u043e\u0432\u0430\u043d\u0438\u044f, \u043d\u043e \u0441\u0435\u0439\u0447\u0430\u0441 \u043c\u044b \u0435\u0435 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u043d\u0435 \u0431\u0443\u0434\u0435\u043c.<\/p>\n<p>  \u0422\u0440\u0438\u0433\u0440\u0430\u043c\u043c\u044b:<\/p>\n<pre><code class=\"sql\">create table MRC_TRIPLES (     TR_ID           integer identity,     TR_DATA         nvarchar,     TR_WORDID       integer,     primary key(TR_DATA, TR_WORDID, TR_ID)); <\/code><\/pre>\n<p>\u041a\u0430\u0436\u0434\u0430\u044f \u0437\u0430\u043f\u0438\u0441\u044c \u0441\u043e\u0434\u0435\u0440\u0436\u0438\u0442 \u0441\u0430\u043c\u0443 \u0442\u0440\u0438\u0433\u0440\u0430\u043c\u043c\u0443, \u0438\u0434\u0435\u043d\u0442\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440 \u0441\u043b\u043e\u0432\u0430, \u0438\u0437 \u043a\u043e\u0442\u043e\u0440\u043e\u0433\u043e \u043e\u043d\u0430 \u043f\u0440\u0438\u0448\u043b\u0430 \u0438 \u0443\u043d\u0438\u043a\u0430\u043b\u044c\u043d\u044b\u0439 \u0438\u0434\u0435\u043d\u0442\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440 \u043d\u0430 \u0441\u043b\u0443\u0447\u0430\u0439, \u043a\u043e\u0433\u0434\u0430 \u0442\u0440\u0438\u0433\u0440\u0430\u043c\u043c\u0430 \u0432\u0441\u0442\u0440\u0435\u0442\u0438\u043b\u0430\u0441\u044c \u0432 \u0441\u043b\u043e\u0432\u0435 \u043d\u0435\u0441\u043a\u043e\u043b\u044c\u043a\u043e \u0440\u0430\u0437 (Ex: &#8216;\u044d\u0432\u0435\u043d<i>\u043a\u0438\u0439<\/i>\u0441<i>\u043a\u0438\u0439<\/i>&#8216;)<\/p>\n<p>  \u0421\u043f\u0438\u0441\u043a\u0438 \u0432\u0441\u0442\u0440\u0435\u0447\u0430\u0435\u043c\u043e\u0441\u0442\u0438:<\/p>\n<pre><code class=\"sql\">create table MRC_DATA (     DT_WORDID        integer,     DT_OID           integer,     DT_COL           integer,     DT_POSITION      integer,     primary key(DT_WORDID, DT_OID, DT_COL, DT_POSITION)); <\/code><\/pre>\n<p>\u0417\u0434\u0435\u0441\u044c \u043c\u044b \u0445\u0440\u0430\u043d\u0438\u043c, \u0433\u0434\u0435 \u043a\u0430\u043a\u043e\u0435 \u0441\u043b\u043e\u0432\u043e \u0432 \u043a\u0430\u043a\u043e\u0439 \u043f\u043e\u0437\u0438\u0446\u0438\u0438 \u0432\u0441\u0442\u0440\u0435\u0442\u0438\u043b\u043e\u0441\u044c.<\/p>\n<p>  \u0421\u043e\u0431\u0441\u0442\u0432\u0435\u043d\u043d\u043e \u0434\u0430\u043d\u043d\u044b\u0435:<\/p>\n<pre><code class=\"sql\">create table MRC_TEXT (     TX_ID               integer identity,     TX_AUTHOR           nvarchar,     TX_SUBJ             nvarchar,     TX_STRING           nvarchar,     TX_LONG_STRING      long nvarchar,     TX_TS    timestamp,     primary key (TX_ID)); <\/code><\/pre>\n<p>\u0412\u0441\u0435 \u0434\u0430\u043d\u043d\u044b\u0435 \u0432 \u043d\u0430\u0448\u0435\u0439 \u0442\u0435\u0441\u0442\u043e\u0432\u043e\u0439 \u0437\u0430\u0434\u0430\u0447\u0435 \u0445\u0440\u0430\u043d\u044f\u0442\u0441\u044f \u0432 \u043e\u0434\u043d\u043e\u0439 \u0442\u0430\u0431\u043b\u0438\u0446\u0435, \u0432 \u0442\u0440\u0435\u0445 \u043a\u043e\u043b\u043e\u043d\u043a\u0430\u0445 \u2014 \u0430\u0432\u0442\u043e\u0440, \u043f\u0440\u043e\u0438\u0437\u0432\u0435\u0434\u0435\u043d\u0438\u0435 \u0438 \u0442\u0435\u043a\u0441\u0442. \u0415\u0441\u043b\u0438 \u0442\u0435\u043a\u0441\u0442 \u0434\u043b\u0438\u043d\u043d\u0435\u0435 500 \u0441\u0438\u043c\u0432\u043e\u043b\u043e\u0432, \u043e\u043d \u043f\u043e\u043f\u0430\u0434\u0430\u0435\u0442 \u0432 \u0431\u043b\u043e\u0431. \u0412 \u0436\u0438\u0437\u043d\u0438 \u0442\u0435\u043a\u0441\u0442\u044b, \u043a\u043e\u043d\u0435\u0447\u043d\u043e, \u043c\u043e\u0433\u0443\u0442 \u043e\u043a\u0430\u0437\u0430\u0442\u044c\u0441\u044f \u0432 \u0440\u0430\u0437\u043d\u044b\u0445 \u0442\u0430\u0431\u043b\u0438\u0446\u0430\u0445 \u0438 \u043d\u0430\u0448 \u0438\u043d\u0434\u0435\u043a\u0441 \u043e\u043a\u0430\u0436\u0435\u0442\u0441\u044f \u043c\u043d\u043e\u0433\u043e\u0442\u0430\u0431\u043b\u0438\u0447\u043d\u044b\u043c. \u041a\u0430\u043a \u0441 \u044d\u0442\u0438\u043c \u0441\u043f\u0440\u0430\u0432\u043b\u044f\u0442\u044c\u0441\u044f, \u043d\u0430\u043f\u0438\u0441\u0430\u043d\u043e <a href=\"http:\/\/habrahabr.ru\/post\/201474\/\">\u0437\u0434\u0435\u0441\u044c<\/a>.<\/p>\n<p>  <b>\u0422\u0440\u0438\u0433\u0433\u0435\u0440 \u0432\u0441\u0442\u0430\u0432\u043a\u0438<\/b><br \/>  \u0412\u0441\u044e \u0438\u043d\u0434\u0435\u043a\u0441\u0430\u0446\u0438\u044e \u043c\u044b \u0441\u043f\u0440\u044f\u0447\u0435\u043c \u0432\u043d\u0443\u0442\u0440\u044c \u0442\u0440\u0438\u0433\u0433\u0435\u0440\u0430 \u043d\u0430 \u0432\u0441\u0442\u0430\u0432\u043a\u0443:  <\/p>\n<pre><code class=\"sql\">create trigger MRC_TEXT_I after insert on MRC_TEXT {     declare wordid integer;     declare str nvarchar;     str := coalesce(TX_STRING, cast(blob_to_string(TX_LONG_STRING)as nvarchar));     MRC_PROCESS_STR_I(str, TX_ID, 1);     str := TX_SUBJ;     MRC_PROCESS_STR_I(str, TX_ID, 2);     str := TX_AUTHOR;     MRC_PROCESS_STR_I(str, TX_ID, 3); }; <\/code><\/pre>\n<p>\u0422.\u0435. \u043c\u044b \u0442\u0440\u0438\u0436\u0434\u044b \u0432\u044b\u0437\u044b\u0432\u0430\u0435\u043c \u0444\u0443\u043d\u043a\u0446\u0438\u044e <i>MRC_PROCESS_STR_I<\/i> \u0434\u043b\u044f \u043a\u0430\u0436\u0434\u043e\u0433\u043e \u0438\u0437 \u0438\u043d\u0434\u0435\u043a\u0441\u0438\u0440\u0443\u0435\u043c\u044b\u0445 \u043f\u043e\u043b\u0435\u0439:  <\/p>\n<pre><code class=\"sql\">create procedure MRC_PROCESS_STR_I( \tin str nvarchar,  \tin oid integer,  \tin col integer) {   if (str is not null) {     declare vec any;     vec := nv_split(str);     declare n any;     declare wordid any;     if (vec &lt;&gt; 0 and vec is not null) {       n := length(vec);       declare i integer;       i := 0;       while (i &lt; n) {         wordid := treat_nword(vec[i]));         if (wordid &gt; 0) {           insert into MRC_DATA (DT_WORDID, DT_OID, DT_COL, DT_POSITION)              values (wordid, oid, col, i);         }         i := i + 1;       }     }   } }; <\/code><\/pre>\n<p>\u0417\u0434\u0435\u0441\u044c \u043c\u044b \u0440\u0430\u0441\u0449\u0435\u043f\u043b\u044f\u0435\u043c \u0441\u0442\u0440\u043e\u043a\u0443 \u043d\u0430 \u043e\u0442\u0434\u0435\u043b\u044c\u043d\u044b\u0435 \u0441\u043b\u043e\u0432\u0430 \u0441 \u043f\u043e\u043c\u043e\u0449\u044c\u044e \u0444\u0443\u043d\u043a\u0446\u0438\u0438 <i>nv_split<\/i>, \u043e\u0431\u0440\u0430\u0431\u0430\u0442\u044b\u0432\u0430\u0435\u043c \u043a\u0430\u0436\u0434\u043e\u0435 \u0441\u043b\u043e\u0432\u043e \u0441 \u043f\u043e\u043c\u043e\u0449\u044c\u044e <i>treat_nword<\/i> \u0438 \u0437\u0430\u043f\u0438\u0441\u044b\u0432\u0430\u0435\u043c \u0434\u0430\u043d\u043d\u044b\u0435 \u043e \u043a\u0430\u0436\u0434\u043e\u043c \u0441\u043b\u043e\u0432\u0435 \u0432 \u0442\u0430\u0431\u043b\u0438\u0446\u0443 <i>MRC_DATA<\/i>.<br \/>  \u0423\u043f\u043e\u043c\u044f\u043d\u0443\u0442\u044b\u0435 <i>nv_split<\/i> \u0438 <i>treat_nword<\/i> \u043d\u0430\u043f\u0438\u0441\u0430\u043d\u044b (\u0434\u043b\u044f \u044d\u0442\u043e\u0439 \u0437\u0430\u0434\u0430\u0447\u0438) \u043d\u0430 \u0421 \u0438 \u0434\u043e\u0441\u0442\u0443\u043f\u043d\u044b \u0447\u0435\u0440\u0435\u0437 \u0438\u043d\u0442\u0435\u0440\u0444\u0435\u0439\u0441 \u043f\u043b\u0430\u0433\u0438\u043d\u043e\u0432.<br \/>  \u0421 \u043f\u0435\u0440\u0432\u043e\u0439 \u0432\u0441\u0451 \u0438 \u0442\u0430\u043a \u043f\u043e\u043d\u044f\u0442\u043d\u043e, \u0430 \u0432\u0442\u043e\u0440\u0430\u044f \u0434\u043e\u043b\u0436\u043d\u0430 \u0440\u0430\u0437\u043e\u0431\u0440\u0430\u0442\u044c \u0441\u043b\u043e\u0432\u043e \u043d\u0430 \u0442\u0440\u0438\u0433\u0440\u0430\u043c\u043c\u044b, \u0437\u0430\u043f\u0438\u0441\u0430\u0442\u044c \u0438\u0445 \u0432 \u0441\u043e\u043e\u0442\u0432\u0435\u0442\u0441\u0442\u0432\u0443\u044e\u0449\u0443\u044e \u0442\u0430\u0431\u043b\u0438\u0446\u0443, \u0437\u0430\u043f\u0438\u0441\u0430\u0442\u044c \u0441\u043b\u043e\u0432\u043e \u0432 \u0442\u0430\u0431\u043b\u0438\u0446\u0443 \u0441\u043b\u043e\u0432 \u0438 \u043e\u0431\u043d\u043e\u0432\u0438\u0442\u044c \u0441\u043b\u043e\u0432\u0430\u0440\u044c \u0432 \u043f\u0430\u043c\u044f\u0442\u0438.<\/p>\n<p>  <b>\u0421\u043b\u043e\u0432\u0430\u0440\u044c \u0432 \u043f\u0430\u043c\u044f\u0442\u0438<\/b><br \/>  \u0421\u043e\u0441\u0442\u043e\u0438\u0442 \u0438\u0437 \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0438\u0445 \u0441\u0443\u0449\u043d\u043e\u0441\u0442\u0435\u0439:  <\/p>\n<ul>\n<li><i>ht_dict_<\/i> \u2014 hash-map, \u0443\u043c\u0435\u044e\u0449\u0438\u0439 \u043f\u043e utf8 \u043f\u0440\u0435\u0434\u0441\u0442\u0430\u0432\u043b\u0435\u043d\u0438\u044e \u0441\u043b\u043e\u0432\u0430 \u0434\u043e\u0441\u0442\u0430\u0432\u0430\u0442\u044c \u0435\u0433\u043e \u043d\u043e\u043c\u0435\u0440<\/li>\n<li><i>ht_dict_by_id_<\/i> \u2014 hash-map, \u043a\u043e\u0442\u043e\u0440\u044b\u0439 \u043f\u043e \u043d\u043e\u043c\u0435\u0440\u0443 \u0441\u043b\u043e\u0432\u0430 \u0434\u043e\u0441\u0442\u0430\u0435\u0442 \u0435\u0433\u043e \u043e\u043f\u0438\u0441\u0430\u0442\u0435\u043b\u044c<\/li>\n<li><i>ht_triples_<\/i> \u2014 hash-map, \u0432 \u043a\u043e\u0442\u043e\u0440\u043e\u043c \u043f\u043e utf8 \u0437\u043d\u0430\u0447\u0435\u043d\u0438\u044e \u0442\u0440\u0438\u0433\u0440\u0430\u043c\u043c\u044b \u043c\u044b \u043f\u043e\u043b\u0443\u0447\u0430\u0435\u043c \u0433\u043e\u043b\u043e\u0432\u0443 \u0441\u043f\u0438\u0441\u043a\u0430 \u0432\u0441\u0435\u0445 \u0438\u0434\u0435\u043d\u0442\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440\u043e\u0432 \u0441\u043b\u043e\u0432, \u0432 \u043a\u043e\u0442\u043e\u0440\u044b\u0445 \u044d\u0442\u0430 \u0442\u0440\u0438\u0433\u0440\u0430\u043c\u043c\u0430 \u0432\u0441\u0442\u0440\u0435\u0442\u0438\u043b\u0430\u0441\u044c<\/li>\n<\/ul>\n<p>\u041e\u0442\u0434\u0435\u043b\u044c\u043d\u043e \u0441\u043b\u0435\u0434\u0443\u0435\u0442 \u043e\u0442\u043c\u0435\u0442\u0438\u0442\u044c, \u043f\u0440\u0438 \u0440\u0430\u0437\u0431\u0438\u0435\u043d\u0438\u0438 \u0441\u043b\u043e\u0432\u0430 \u043d\u0430 \u0442\u0440\u0438\u0433\u0440\u0430\u043c\u043c\u044b \u0432 \u043d\u0430\u0447\u0430\u043b\u043e \u0438 \u043a\u043e\u043d\u0435\u0446 \u0441\u043b\u043e\u0432\u0430 \u0434\u043e\u0431\u0430\u0432\u043b\u044f\u044e\u0442\u0441\u044f \u043f\u0440\u043e\u0431\u0435\u043b\u044b, \u0447\u0442\u043e\u0431\u044b \u043e\u0431\u0435\u0441\u043f\u0435\u0447\u0438\u0442 \u0442\u0430\u043a\u0438\u043c \u043e\u0431\u0440\u0430\u0437\u043e\u043c \u0431\u043e\u043d\u0443\u0441\u044b \u0437\u0430 \u043f\u0440\u0430\u0432\u0438\u043b\u044c\u043d\u043e\u0435 \u043d\u0430\u0447\u0430\u043b\u043e \u0438\/\u0438\u043b\u0438 \u043a\u043e\u043d\u0435\u0446 \u0441\u043b\u043e\u0432\u0430.<\/p>\n<p>  <b>\u0421\u043b\u043e\u0432\u0430\u0440\u043d\u044b\u0439 \u043f\u043e\u0438\u0441\u043a<\/b><br \/>  \u0420\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442\u043e\u043c \u0441\u043b\u043e\u0432\u0430\u0440\u043d\u043e\u0433\u043e \u043f\u043e\u0438\u0441\u043a\u0430 \u044f\u0432\u043b\u044f\u0435\u0442\u0441\u044f \u0441\u043f\u0438\u0441\u043e\u043a \u043a\u0430\u043d\u0434\u0438\u0434\u0430\u0442\u043e\u0432 \u0438 \u0438\u0445 \u043f\u043e\u0445\u043e\u0436\u0435\u0441\u0442\u0438 \u043d\u0430 \u043f\u0440\u0435\u0434\u044a\u044f\u0432\u043b\u0435\u043d\u043d\u044b\u0439 \u043e\u0431\u0440\u0430\u0437\u0435\u0446.<br \/>  \u0410\u043b\u0433\u043e\u0440\u0438\u0442\u043c \u0442\u0430\u043a\u043e\u0432:<\/p>\n<ul>\n<li>\u043d\u043e\u0440\u043c\u0430\u043b\u0438\u0437\u0443\u0435\u043c \u0441\u043b\u043e\u0432\u043e, \u043d\u0430\u043f\u0440\u0438\u043c\u0435\u0440, \u043f\u0440\u0438\u0432\u043e\u0434\u0438\u043c \u043a \u0432\u0435\u0440\u0445\u043d\u0435\u043c\u0443 \u0440\u0435\u0433\u0438\u0441\u0442\u0440\u0443<\/li>\n<li>\u0434\u043e\u0431\u0430\u0432\u043b\u044f\u0435\u043c \u043f\u0440\u043e\u0431\u0435\u043b\u044b \u0432 \u043d\u0430\u0447\u0430\u043b\u043e \u0438 \u043a\u043e\u043d\u0435\u0446 \u0441\u043b\u043e\u0432\u0430 \u0438 \u0440\u0430\u0437\u0431\u0438\u0432\u0430\u0435\u043c \u0442\u043e \u0447\u0442\u043e \u043f\u043e\u043b\u0443\u0447\u0438\u043b\u043e\u0441\u044c \u043d\u0430 \u0442\u0440\u0438\u0433\u0440\u0430\u043c\u043c\u044b<\/li>\n<li>\u0434\u043b\u044f \u043a\u0430\u0436\u0434\u043e\u0439 \u0442\u0440\u0438\u0433\u0440\u0430\u043c\u043c\u044b \u043d\u0430\u0445\u043e\u0434\u0438\u043c \u0441\u043f\u0438\u0441\u043e\u043a \u0441\u043b\u043e\u0432 \u0433\u0434\u0435 \u043e\u043d\u0430 \u0432\u0441\u0442\u0440\u0435\u0442\u0438\u043b\u0430\u0441\u044c<\/li>\n<li>\u0438 \u0434\u043b\u044f \u043a\u0430\u0436\u0434\u043e\u0433\u043e \u0442\u0430\u043a\u043e\u0433\u043e \u0441\u043b\u043e\u0432\u0430 \u043d\u0430\u0440\u0430\u0449\u0438\u0432\u0430\u0435\u043c \u0441\u0447\u0435\u0442\u0447\u0438\u043a \u0441\u0441\u044b\u043b\u043e\u043a<\/li>\n<li>\u043f\u043e\u0441\u043b\u0435 \u043e\u0431\u0440\u0430\u0431\u043e\u0442\u043a\u0438 \u0442\u0440\u0438\u0433\u0440\u0430\u043c\u043c \u043c\u044b \u043e\u0441\u0442\u0430\u0432\u043b\u044f\u0435\u043c \u0442\u043e\u043b\u044c\u043a\u043e \u0441\u043b\u043e\u0432\u0430, \u0434\u043b\u044f \u043a\u043e\u0442\u043e\u0440\u044b\u0445 \u0447\u0438\u0441\u043b\u043e \u0441\u0441\u044b\u043b\u043e\u043a \u0432\u044b\u0448\u0435 \u043d\u0435\u043a\u043e\u0442\u043e\u0440\u043e\u0433\u043e \u043f\u043e\u0440\u043e\u0433\u0430, <br \/>  \u043f\u043e\u0440\u043e\u0433\u043e\u043c \u0432 \u0434\u0430\u043d\u043d\u043e\u043c \u0441\u043b\u0443\u0447\u0430\u0435 \u044f\u0432\u043b\u044f\u0435\u0442\u0441\u044f \u043f\u043e\u043b\u043e\u0432\u0438\u043d\u0430 \u043e\u0442 \u043c\u0430\u043a\u0441\u0438\u043c\u0430\u043b\u044c\u043d\u043e\u0433\u043e \u043a-\u0432\u0430 \u0441\u0441\u044b\u043b\u043e\u043a + 1<\/li>\n<li>\u0434\u043b\u044f \u0432\u0441\u0435\u0445 \u043e\u0441\u0442\u0430\u0432\u0448\u0438\u0445\u0441\u044f \u0441\u043b\u043e\u0432 \u0432\u044b\u0447\u0438\u0441\u043b\u044f\u0435\u043c \u0438\u0445 \u043f\u043e\u0445\u043e\u0436\u0435\u0441\u0442\u044c \u043d\u0430 \u0438\u0441\u0445\u043e\u0434\u043d\u043e\u0435 \u0441\u043b\u043e\u0432\u043e, \u0432 \u0434\u0430\u043d\u043d\u043e\u043c \u0441\u043b\u0443\u0447\u0430\u0435 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u0443\u0435\u0442\u0441\u044f \u0440\u0430\u0441\u0441\u0442\u043e\u044f\u043d\u0438\u0435 \u041b\u0435\u0432\u0435\u043d\u0448\u0442\u0435\u0439\u043d\u0430 \u0441 \u0434\u043e\u043f\u043e\u043b\u043d\u0438\u0442\u0435\u043b\u044c\u043d\u044b\u043c \u0431\u043e\u043d\u0443\u0441\u043e\u043c \u0437\u0430 \u043f\u0440\u0430\u0432\u0438\u043b\u044c\u043d\u043e\u0435 \u043d\u0430\u0447\u0430\u043b\u043e \u0441\u043b\u043e\u0432\u0430<\/li>\n<li>\u0441\u043e\u0440\u0442\u0438\u0440\u0443\u0435\u043c \u0441\u043f\u0438\u0441\u043e\u043a \u0441\u043b\u043e\u0432 \u043f\u043e \u0438\u0434\u0435\u043d\u0442\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440\u0430\u043c<\/li>\n<\/ul>\n<p>  <b>\u0421\u0442\u0440\u0443\u043a\u0442\u0443\u0440\u043d\u044b\u0439 \u043f\u043e\u0438\u0441\u043a<\/b><br \/>  \u0417\u0430\u0434\u0430\u0447\u0430 \u0441\u0442\u0440\u0443\u043a\u0442\u0443\u0440\u043d\u043e\u0433\u043e \u043f\u043e\u0438\u0441\u043a\u0430 \u0432 \u043d\u0430\u0448\u0435\u043c \u0441\u043b\u0443\u0447\u0430\u0435 \u2014 \u0438\u0441\u0445\u043e\u0434\u044f \u0438\u0437 \u0441\u043f\u0438\u0441\u043a\u043e\u0432 \u043a\u0430\u043d\u0434\u0438\u0434\u0430\u0442\u043e\u0432, \u0432\u044b\u0434\u0430\u043d\u043d\u044b\u0445 \u0441\u043b\u043e\u0432\u0430\u0440\u043d\u044b\u043c \u043f\u043e\u0438\u0441\u043a\u043e\u043c, \u0441\u0444\u043e\u0440\u043c\u0438\u0440\u043e\u0432\u0430\u0442\u044c \u0441\u043f\u0438\u0441\u043e\u043a \u0438\u0434\u0435\u043d\u0442\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440\u043e\u0432 \u0437\u0430\u043f\u0438\u0441\u0435\u0439, \u0432 \u043a\u043e\u0442\u043e\u0440\u044b\u0445 \u0432\u0441\u0442\u0440\u0435\u0442\u0438\u043b\u0438\u0441\u044c \u043a\u0430\u043d\u0434\u0438\u0434\u0430\u0442\u044b \u0432\u0441\u0435\u0445 \u0438\u0441\u0445\u043e\u0434\u043d\u044b\u0445 \u0441\u043b\u043e\u0432.<br \/>  \u041f\u043e\u043a\u0430 \u0434\u0430\u043d\u043d\u044b\u0445 \u043e\u0442\u043d\u043e\u0441\u0438\u0442\u0435\u043b\u044c\u043d\u043e \u043d\u0435\u043c\u043d\u043e\u0433\u043e, \u043a\u0430\u043a, \u043d\u0430\u043f\u0440\u0438\u043c\u0435\u0440, \u0443 \u043d\u0430\u0441, \u043c\u043e\u0436\u043d\u043e \u043e\u0441\u043e\u0431\u043e \u043d\u0435 \u0437\u0430\u0431\u043e\u0442\u0438\u0442\u044c\u0441\u044f \u043e \u043a\u043e\u043b\u0438\u0447\u0435\u0441\u0442\u0432\u0435 \u0432\u044b\u0434\u0435\u043b\u044f\u0435\u043c\u043e\u0439 \u043f\u0430\u043c\u044f\u0442\u0438 \u0438 \u043f\u0440\u043e\u0441\u0442\u043e \u0437\u0430\u0433\u0440\u0443\u0437\u0438\u0442\u044c \u0432\u0441\u0435 \u0438\u0434\u0435\u043d\u0442\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440\u044b \u0441\u0442\u0440\u043e\u043a \u0434\u043b\u044f \u0438\u043d\u0442\u0435\u0440\u0435\u0441\u0443\u044e\u0449\u0438\u0445 \u043d\u0430\u0441 \u0441\u043b\u043e\u0432\u0430\u0440\u043d\u044b\u0445 \u043a\u0430\u043d\u0434\u0438\u0434\u0430\u0442\u043e\u0432. \u0418\u0442\u0430\u043a:  <\/p>\n<ul>\n<li>\u0414\u043b\u044f \u043a\u0430\u0436\u0434\u043e\u0433\u043e \u0441\u043b\u043e\u0432\u0430 \u0437\u0430\u043f\u0440\u043e\u0441\u0430 \u0443 \u043d\u0430\u0441 \u0435\u0441\u0442\u044c \u0441\u043f\u0438\u0441\u043e\u043a \u043a\u0430\u043d\u0434\u0438\u0434\u0430\u0442\u043e\u0432<\/li>\n<li>\u043d\u0430\u0445\u043e\u0434\u0438\u043c \u0438\u0434\u0435\u043d\u0442\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440\u044b \u0432\u0441\u0435\u0445 \u0437\u0430\u043f\u0438\u0441\u0435\u0439, \u0433\u0434\u0435 \u0432\u0441\u0442\u0440\u0435\u0442\u0438\u043b\u0438\u0441\u044c \u044d\u0442\u0438 \u043a\u0430\u043d\u0434\u0438\u0434\u0430\u0442\u044b \u0438 \u0437\u0430\u043d\u043e\u0441\u0438\u043c \u0432 \u0441\u043f\u0438\u0441\u043e\u043a<\/li>\n<li>\u0441\u043e\u0440\u0442\u0438\u0440\u0443\u0435\u043c \u0441\u043f\u0438\u0441\u043e\u043a<\/li>\n<li>\u043f\u0435\u0440\u0435\u0441\u0435\u043a\u0430\u0435\u043c \u0441\u043f\u0438\u0441\u043a\u0438 \u0432\u0441\u0435\u0445 \u0438\u0441\u0445\u043e\u0434\u043d\u044b\u0445 \u0441\u043b\u043e\u0432 \u0438 \u043f\u043e\u043b\u0443\u0447\u0430\u0435\u043c \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442 \u2014 \u0441\u043f\u0438\u0441\u043e\u043a \u0438\u0434\u0435\u043d\u0442\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440\u043e\u0432 \u0437\u0430\u043f\u0438\u0441\u0435\u0439, \u0433\u0434\u0435 \u0432\u0441\u0442\u0440\u0435\u0442\u0438\u043b\u0438\u0441\u044c \u0432\u0441\u0435 \u0438\u0441\u0445\u043e\u0434\u043d\u044b\u0435 \u0441\u043b\u043e\u0432\u0430<\/li>\n<\/ul>\n<p>\u041f\u0440\u0435\u0434\u043f\u043e\u043b\u043e\u0436\u0438\u043c, \u0434\u0430\u043d\u043d\u044b\u0445 \u0441\u0442\u0430\u043b\u043e \u0441\u043b\u0438\u0448\u043a\u043e\u043c \u043c\u043d\u043e\u0433\u043e \u0434\u043b\u044f \u0442\u043e\u0433\u043e, \u0447\u0442\u043e\u0431\u044b \u0434\u0435\u0440\u0436\u0430\u0442\u044c \u0432 \u043f\u0430\u043c\u044f\u0442\u0438 \u0432\u0441\u0435 \u0438\u0434\u0435\u043d\u0442\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440\u044b. \u0412 \u044d\u0442\u043e\u043c \u0441\u043b\u0443\u0447\u0430\u0435 \u043c\u044b:  <\/p>\n<ul>\n<li>\u041f\u043e\u043b\u044c\u0437\u0443\u044f\u0441\u044c \u0442\u0435\u043c, \u0447\u0442\u043e \u0441\u043f\u0438\u0441\u043a\u0438 \u0432\u0441\u0442\u0440\u0435\u0447\u0430\u0435\u043c\u043e\u0441\u0442\u0438 \u043e\u0442\u0441\u043e\u0440\u0442\u0438\u0440\u043e\u0432\u0430\u043d\u044b \u043f\u043e DT_WORDID, DT_OID,\u2026, \u0438\u043c\u0435\u0435\u043c \u0432\u043e\u0437\u043c\u043e\u0436\u043d\u043e\u0441\u0442\u044c \u0434\u0451\u0448\u0435\u0432\u043e \u0437\u0430\u0433\u0440\u0443\u0436\u0430\u0442\u044c \u0434\u0430\u043d\u043d\u044b\u0435 \u043d\u0435 \u0446\u0435\u043b\u0438\u043a\u043e\u043c, \u0430 \u043b\u0438\u0448\u044c \u0432 \u0434\u0438\u0430\u043f\u0430\u0437\u043e\u043d\u0435 \u0438\u0434\u0435\u043d\u0442\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440\u043e\u0432<\/li>\n<li>\u0441\u043e\u0440\u0442\u0438\u0440\u043e\u0432\u0430\u0442\u044c \u044d\u0442\u0438 \u0447\u0430\u0441\u0442\u0438\u0447\u043d\u044b\u0435 \u0434\u0430\u043d\u043d\u044b\u0435<\/li>\n<li>\u043f\u0435\u0440\u0435\u0441\u0435\u043a\u0430\u0442\u044c \u0441\u043f\u0438\u0441\u043a\u0438, \u043f\u043e\u043b\u0443\u0447\u0430\u044f \u0447\u0430\u0441\u0442\u0438\u0447\u043d\u044b\u0439 \u0440\u0435\u0437\u0443\u043b\u044c\u0442\u0430\u0442<\/li>\n<li>\u043f\u0440\u0438\u043d\u0438\u043c\u0430\u0442\u044c\u0441\u044f \u0437\u0430 \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0443\u044e \u043f\u0430\u0440\u0442\u0438\u044e<\/li>\n<\/ul>\n<p>\u041e\u0442\u043c\u0435\u0442\u0438\u043c \u043e\u0442\u0434\u0435\u043b\u044c\u043d\u043e, \u0447\u0442\u043e \u0435\u0441\u043b\u0438 \u043a\u0430\u043a\u043e\u0439-\u043b\u0438\u0431\u043e \u0441\u043b\u043e\u0432\u0430\u0440\u043d\u044b\u0439 \u043a\u0430\u043d\u0434\u0438\u0434\u0430\u0442 \u0432\u0441\u0442\u0440\u0435\u0447\u0430\u0435\u0442\u0441\u044f \u0432\u0435\u0437\u0434\u0435 \u043d\u0435\u043e\u0434\u043d\u043e\u043a\u0440\u0430\u0442\u043d\u043e, \u0432\u043e\u0437\u043c\u043e\u0436\u043d\u043e, \u044d\u0442\u043e \u043c\u0443\u0441\u043e\u0440\u043d\u043e\u0435 \u0441\u043b\u043e\u0432\u043e \u0438 \u0435\u0433\u043e \u0441\u043b\u0435\u0434\u0443\u0435\u0442 \u043f\u0440\u043e\u0441\u0442\u043e \u043f\u0440\u043e\u0438\u0433\u043d\u043e\u0440\u0438\u0440\u043e\u0432\u0430\u0442\u044c.<\/p>\n<p>  <b>\u0420\u0430\u043d\u0436\u0438\u0440\u043e\u0432\u0430\u043d\u0438\u0435<\/b><br \/>  \u0411\u0443\u0434\u0435\u043c \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u0442\u044c \u0434\u043e\u0441\u0442\u0430\u0442\u043e\u0447\u043d\u043e \u043f\u0440\u0438\u043c\u0438\u0442\u0438\u0432\u043d\u0443\u044e \u0441\u0445\u0435\u043c\u0443 \u0440\u0430\u043d\u0436\u0438\u0440\u043e\u0432\u0430\u043d\u0438\u044f:  <\/p>\n<ul>\n<li>\u0441\u043b\u043e\u0432\u0430\u0440\u043d\u0443\u044e \u0447\u0430\u0441\u0442\u044c \u043e\u0446\u0435\u043d\u043a\u0438(<i>SCORE<\/i>) \u043f\u043e\u043b\u0443\u0447\u0438\u043c, \u043f\u0435\u0440\u0435\u043c\u043d\u043e\u0436\u0430\u044f \u043d\u043e\u0440\u043c\u0438\u0440\u043e\u0432\u0430\u043d\u043d\u044b\u0435 \u043e\u0446\u0435\u043d\u043a\u0438 \u043f\u043e \u043e\u0442\u0434\u0435\u043b\u044c\u043d\u044b\u043c \u043f\u043e\u043f\u0430\u0434\u0430\u043d\u0438\u044f\u043c<\/li>\n<li>\u043f\u043e\u0437\u0438\u0446\u0438\u043e\u043d\u043d\u0443\u044e \u0447\u0430\u0441\u0442\u044c \u043e\u0446\u0435\u043d\u043a\u0438(<i>POS<\/i>) \u043f\u043e\u043b\u0443\u0447\u0438\u043c, \u0443\u0441\u0440\u0435\u0434\u043d\u044f\u044f \u0430\u0431\u0441\u043e\u043b\u044e\u0442\u043d\u044b\u0435 \u0440\u0430\u0441\u0441\u0442\u043e\u044f\u043d\u0438\u044f \u043f\u043e\u0437\u0438\u0446\u0438\u0438 \u0442\u0435\u043a\u0443\u0449\u0435\u0433\u043e \u043f\u043e\u043f\u0430\u0434\u0430\u043d\u0438\u044f \u043e\u0442 \u043f\u043e\u0437\u0438\u0446\u0438\u0438 \u043f\u0440\u0435\u0434\u044b\u0434\u0443\u0449\u0435\u0433\u043e (\u0438\u043c\u0435\u0435\u0442\u0441\u044f \u0432\u0432\u0438\u0434\u0443 \u043f\u043e\u0437\u0438\u0446\u0438\u044f \u043f\u043e\u043f\u0430\u0434\u0430\u043d\u0438\u044f \u0441\u043b\u043e\u0432\u0430 \u0432 \u0442\u0435\u043a\u0441\u0442\u0435 \u0437\u0430\u043f\u0438\u0441\u0438)<\/li>\n<li>\u0438\u0442\u043e\u0433\u043e\u0432\u0430\u044f \u043e\u0446\u0435\u043d\u043a\u0430 \u0440\u0430\u0432\u043d\u0430 <i>SCORE\/(1 + sqrt(POS\/5))<\/i><\/li>\n<\/ul>\n<p>  <b>\u041f\u043e\u0438\u0441\u043a \u0447\u0435\u0440\u0435\u0437 PL\/SQL<\/b><br \/>  \u0427\u0442\u043e\u0431\u044b \u043e\u0440\u0433\u0430\u043d\u0438\u0437\u043e\u0432\u0430\u0442\u044c \u043f\u043e\u0442\u043e\u043a \u043f\u0435\u0440\u0432\u0438\u0447\u043d\u044b\u0445 \u0438\u0434\u0435\u043d\u0442\u0438\u0444\u0438\u043a\u0430\u0442\u043e\u0440\u043e\u0432 \u0434\u043b\u044f \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0438\u044f \u043d\u0430\u0448\u0435\u0433\u043e \u043f\u043e\u0438\u0441\u043a\u0430 \u0432 \u0440\u0435\u0433\u0443\u043b\u044f\u0440\u043d\u043e\u043c SQL, \u043d\u0430\u043c \u043f\u043e\u0442\u0440\u0435\u0431\u0443\u0435\u0442\u0441\u044f \u0441\u043b\u0435\u0434\u0443\u044e\u0449\u0430\u044f \u043f\u0440\u043e\u0446\u0435\u0434\u0443\u0440\u0430 \u0438 \u043f\u0440\u043e\u0446\u0435\u0434\u0443\u0440\u043d\u043e\u0435 view \u0434\u043b\u044f \u0434\u043e\u0441\u0442\u0443\u043f\u0430 \u043a \u043d\u0435\u0439:  <\/p>\n<pre><code class=\"sql\">create procedure MRC_QUERY_STRING_ALL ( \tin query varchar) {   declare vec any;   declare len integer;   result_names('oid','score');   vec := query_phrase(query);   if (vec &lt;&gt; 0 and vec is not null)  {     len := length(vec);     declare i integer;      i := 0;     while(i&lt;len) {       declare oid integer;       oid := vec[i];       result (oid, vec[i + 1]);       i := i + 2;     }   } }; create procedure view v_query_phrase_all as    MRC_QUERY_STRING_ALL(str)(oid integer, score integer); <\/code><\/pre>\n<p>  \u0417\u0430\u043f\u0440\u043e\u0441 \u0442\u0435\u043f\u0435\u0440\u044c \u043c\u043e\u0436\u0435\u0442 \u0432\u044b\u0433\u043b\u044f\u0434\u0435\u0442\u044c \u043a\u0430\u043a:<\/p>\n<pre><code class=\"sql\">select  a.TX_ID, a.TX_TS, b.score, a.TX_AUTHOR, a.TX_SUBJ,    coalesce (a.TX_STRING, blob_to_string(TX_LONG_STRING))       from MRC_TEXT as a,  v_query_phrase_all as b      where b.str = 'Posting Date' and a.TX_ID = b.oid; <\/code><\/pre>\n<p> \u0418\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u043d\u0430\u044f \u0444\u0443\u043d\u043a\u0446\u0438\u044f <i>query_phrase<\/i> \u044f\u0432\u043b\u044f\u0435\u0442\u0441\u044f \u0421 \u0440\u0430\u0441\u0448\u0438\u0440\u0435\u043d\u0438\u0435\u043c \u0438 \u043e\u0441\u0443\u0449\u0435\u0441\u0442\u0432\u043b\u044f\u0435\u0442 \u0432\u0441\u044e \u0442\u0443 \u043d\u0438\u0437\u043a\u043e\u0443\u0440\u043e\u0432\u043d\u0435\u0432\u0443\u044e \u0434\u0435\u044f\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u044c, \u043e \u043a\u043e\u0442\u043e\u0440\u043e\u0439 \u0433\u043e\u0432\u043e\u0440\u0438\u043b\u043e\u0441\u044c \u0432\u044b\u0448\u0435.<\/p>\n<p>  <b>Benchmark<\/b><br \/>  i7-3612QM, Win64.<br \/>  \u0417\u0430\u043b\u0438\u0432\u043a\u0430 160 254 \u0437\u0430\u043f\u0438\u0441\u0435\u0439 \u0437\u0430\u043d\u0438\u043c\u0430\u0435\u0442 3 \u043c\u0438\u043d 2 \u0441\u0435\u043a \u0438\u043b\u0438 1.14 \u043c\u0441\u0435\u043a \u043d\u0430 \u0437\u0430\u043f\u0438\u0441\u044c.<br \/>  \u0414\u043b\u044f \u0442\u0435\u0441\u0442\u0438\u0440\u043e\u0432\u0430\u043d\u0438\u044f \u043f\u043e\u0438\u0441\u043a\u0430 \u0431\u0443\u0434\u0435\u043c \u0438\u0441\u043a\u0430\u0442\u044c \u043f\u0435\u0440\u0432\u044b\u0435 \u0434\u0432\u0430 \u0441\u043b\u043e\u0432\u0430 \u0432 \u043a\u0430\u0436\u0434\u043e\u0439 \u0437\u0430\u043f\u0438\u0441\u0438, \u0432\u0441\u0435\u0433\u043e 160 254 \u043f\u043e\u0438\u0441\u043a\u043e\u0432\u044b\u0445 \u0437\u0430\u043f\u0440\u043e\u0441\u043e\u0432 \u0432 1, 2 \u0438 4 \u043f\u043e\u0442\u043e\u043a\u0430. \u0418\u0441\u043a\u0430\u0442\u044c \u0431\u0443\u0434\u0435\u043c \u0442\u043e\u043b\u044c\u043a\u043e \u0447\u0438\u0441\u043b\u043e \u043d\u0430\u0439\u0434\u0435\u043d\u043d\u044b\u0445 \u0437\u0430\u043f\u0438\u0441\u0435\u0439, \u0447\u0442\u043e\u0431\u044b \u043d\u0435 \u0443\u0447\u0438\u0442\u044b\u0432\u0430\u0442\u044c \u0432\u0440\u0435\u043c\u044f \u043d\u0430 \u043f\u043e\u0434\u044a\u0435\u043c \u0438 \u043f\u0435\u0440\u0435\u0434\u0430\u0447\u0443 \u0441\u0442\u0440\u043e\u043a. \u0412\u044b\u043f\u043e\u043b\u043d\u0435\u043d\u0438\u0435 \u0437\u0430\u043f\u0440\u043e\u0441\u043e\u0432 \u043e\u0441\u0443\u0449\u0435\u0441\u0442\u0432\u043b\u044f\u0435\u0442\u0441\u044f \u043f\u043e\u0441\u0440\u0435\u0434\u0441\u0442\u0432\u043e\u043c native ODBC \u0438\u043d\u0442\u0435\u0440\u0444\u0435\u0439\u0441\u0430, TCP\/IP \u0447\u0435\u0440\u0435\u0437 localhost.  <\/p>\n<table>\n<tr>\n<th>N \u043f\u043e\u0442\u043e\u043a\u043e\u0432<\/th>\n<th>\u0421\u0443\u043c\u043c\u0430\u0440\u043d\u043e\u0435 \u0432\u0440\u0435\u043c\u044f<\/th>\n<th>1 \u0437\u0430\u043f\u0440\u043e\u0441<\/th>\n<\/tr>\n<tr>\n<td>1<\/td>\n<td>11&#8217;7&#187;<\/td>\n<td>4.16 \u043c\u0441\u0435\u043a<\/td>\n<\/tr>\n<tr>\n<td>2<\/td>\n<td>11&#8217;57&#187;<\/td>\n<td>4.47 \u043c\u0441\u0435\u043a<\/td>\n<\/tr>\n<tr>\n<td>4<\/td>\n<td>14&#8217;51<\/td>\n<td>5.61 \u043c\u0441\u0435\u043a<\/td>\n<\/tr>\n<\/table>\n<p><b><s>\u0412\u044b\u0432\u043e\u0434\u044b<\/s> \u041c\u043e\u0440\u0430\u043b\u044c<\/b><br \/>  \u0422\u0432\u043e\u0440\u0438, \u0432\u044b\u0434\u0443\u043c\u044b\u0432\u0430\u0439, \u043f\u0440\u043e\u0431\u0443\u0439! (\u0421)\u041c\u0430\u044f\u043a\u043e\u0432\u0441\u043a\u0438\u0439<\/p>\n<p>  <b>PS<\/b>:  <\/p>\n<div class=\"spoiler\"><b class=\"spoiler_title\">\u0422\u0435\u043a\u0441\u0442\u044b \u0421 \u0444\u0443\u043d\u043a\u0446\u0438\u0439, \u0432\u0434\u0440\u0443\u0433 \u043a\u043e\u043c\u0443 \u043f\u0440\u0438\u0433\u043e\u0434\u044f\u0442\u0441\u044f<\/b><\/p>\n<div class=\"spoiler_text\">#include &lt;stdio.h&gt;<br \/>  #include &lt;stdlib.h&gt;<br \/>  #include &lt;string.h&gt;<\/p>\n<p>  #include &lt;libutil.h&gt;<\/p>\n<p>  #ifdef WIN32<br \/>  #include &lt;crtdbg.h&gt;<br \/>  #endif<\/p>\n<p>  #include \u00absqlnode.h\u00bb<br \/>  #include \u00absqlbif.h\u00bb<br \/>  #include \u00abwi.h\u00bb<br \/>  #include \u00abDk.h\u00bb<\/p>\n<p>  #include &lt;math.h&gt;<br \/>  #include \u00abcaseutils.h\u00bb<br \/>  #include \u00ablist_sort.h\u00bb<br \/>  #include &lt;assert.h&gt;<\/p>\n<p>  \/\/#include &lt;ksrvext.h&gt;<\/p>\n<p>  static id_hash_t * ht_dict_ = NULL;<br \/>  static dk_hash_t * ht_dict_by_id_ = NULL;<br \/>  static id_hash_t * ht_triples_ = NULL;<br \/>  static dk_mutex_t * dict_mtx_ = NULL;<\/p>\n<p>  struct dict_item_s {<br \/>   char *word_;<br \/>   size_t id_;<br \/>   size_t count_;<br \/>   size_t attr_;<br \/>  };<br \/>  typedef struct dict_item_s dict_item_t;<\/p>\n<p>  struct triple_item_s {<br \/>   size_t wordid_;<br \/>   struct triple_item_s *next_;<br \/>  };<br \/>  typedef struct triple_item_s triple_item_t;<\/p>\n<p>  struct triple_head_s {<br \/>   lenmem_t lm_;<br \/>   wchar_t data_[4];<br \/>   triple_item_t *list_;<br \/>  };<br \/>  typedef struct triple_head_s triple_head_t;<\/p>\n<p>  const wchar_t seps_[] = L&quot; ,.\\t\\r\\n&#8217;\\&quot;=*!%^:;~`&lt;&gt;+|?&quot;;<br \/>  const wchar_t glues_[] = L&quot;()&#038;@#$:{}\/\\\\-[]_&quot;;<\/p>\n<p>  size_t next_wordid (caddr_t * qst)<br \/>  {<br \/>   size_t _id = 0;<br \/>   query_instance_t *q = (query_instance_t *)qst;<br \/>   client_connection_t *cli = q-&gt;qi_client;<br \/>   query_t *stmt = NULL;<br \/>   local_cursor_t *lc = NULL;<br \/>   caddr_t lerr = NULL;<br \/>   caddr_t * err = &lerr;<br \/>   char buf[1024];<br \/>   sprintf (buf, \u00abselect sequence_next (&#8216;MRC_WORD_ID&#8217;)\u00bb);<br \/>   if (NULL == (stmt = sql_compile (buf, cli, err, 0)))<br \/>   goto end;<\/p>\n<p>  if (NULL != (*err =<br \/>   qr_rec_exec (stmt, cli, &#038;lc, (query_instance_t *) qst, NULL,<br \/>   0)))<br \/>   goto end;<\/p>\n<p>  end:<br \/>   if (lc)<br \/>   {<br \/>   lc_next (lc);<br \/>   _id = (size_t)unbox (lc_nth_col (lc, 0));<br \/>   }<br \/>   if (lc)<br \/>   {<br \/>   lc_free (lc);<br \/>   lc = NULL;<br \/>   }<br \/>   if (stmt)<br \/>   {<br \/>   qr_free (stmt);<br \/>   stmt = NULL;<br \/>   }<br \/>   return _id;<br \/>  }<\/p>\n<p>  size_t store_triple (caddr_t * qst, size_t wordid, const wchar_t *triple)<br \/>  {<br \/>   query_instance_t *q = (query_instance_t *)qst;<br \/>   client_connection_t *cli = q-&gt;qi_client;<br \/>   query_t *stmt = NULL;<br \/>   local_cursor_t *lc = NULL;<br \/>   caddr_t lerr = NULL;<br \/>   caddr_t * err = &lerr;<br \/>   char buf[1024];<br \/>   wchar_t wlt[4];<br \/>   wcsncpy(wlt, triple, 3);<br \/>   wlt[3] = 0;<\/p>\n<p>  sprintf (buf, &quot;&#8212;utf8_execs=yes\\n insert into MRC_TRIPLES (TR_DATA, TR_WORDID) values(?,?)&quot;);<br \/>   if (NULL == (stmt = sql_compile (buf, cli, err, 0)))<br \/>   goto end;<\/p>\n<p>  if (NULL != (*err =<br \/>   qr_rec_exec (stmt, cli, &#038;lc, (query_instance_t *) qst, NULL, 2,<br \/>   &quot;:0&quot;, box_wide_string (wlt), QRP_RAW,<br \/>   &quot;:1&quot;, box_num(wordid), QRP_RAW<br \/>   )))<br \/>   goto end;<br \/>   {<br \/>   char**place = NULL;<\/p>\n<p>  triple_item_t *pitem = (triple_item_t *)dk_alloc_box_zero(sizeof(triple_item_t), DV_BIN);<br \/>   triple_head_t *phead = (triple_head_t *)dk_alloc_box_zero(sizeof(triple_head_t), DV_BIN);<\/p>\n<p>  phead-&gt;lm_.lm_length = sizeof(phead-&gt;data_);<br \/>   phead-&gt;lm_.lm_memblock = (char*)phead-&gt;data_;<br \/>   memcpy(phead-&gt;data_, wlt, sizeof(phead-&gt;data_));<\/p>\n<p>  pitem-&gt;wordid_ = wordid;<\/p>\n<p>  place = (char **) id_hash_get (ht_triples_, (caddr_t) &#038;phead-&gt;lm_);<br \/>   if (place)<br \/>   {<br \/>   triple_head_t *ohead = *(triple_head_t**)place;<br \/>   pitem-&gt;next_ = ohead-&gt;list_;<br \/>   ohead-&gt;list_ = pitem;<br \/>   }<br \/>   else<br \/>   {<br \/>   id_hash_set (ht_triples_, (caddr_t)(&#038;phead-&gt;lm_), (caddr_t)&#038;phead);<br \/>   pitem-&gt;next_ = pitem;<br \/>   }<br \/>   }<\/p>\n<p>  end:<br \/>   if (lc)<br \/>   {<br \/>   lc_free (lc);<br \/>   lc = NULL;<br \/>   }<br \/>   if (stmt)<br \/>   {<br \/>   qr_free (stmt);<br \/>   stmt = NULL;<br \/>   }<br \/>   return 0;<br \/>  }<\/p>\n<p>  wchar_t **<br \/>  nv_split (wchar_t *tmp)<br \/>  {<br \/>   wchar_t **arr = NULL;<br \/>   size_t len = wcslen (tmp);<br \/>   size_t ix = 0;<br \/>   size_t i;<br \/>   size_t cnt = 0;<br \/>   for (i = 0; i &lt; len; i++)<br \/>   {<br \/>   if (NULL != wcschr (seps_, tmp[i]))<br \/>   {<br \/>   tmp[ix++] = 0;<br \/>   cnt++;<br \/>   }<br \/>   else <br \/>   {<br \/>   if (NULL == wcschr (glues_, tmp[i]))<br \/>   tmp[ix++] = mrc_toupper (tmp[i]);<br \/>   }<br \/>   } <br \/>   tmp[ix] = 0;<br \/>   cnt = 0;<br \/>   for (i = 0; i &lt; len; i++)<br \/>   {<br \/>   if (tmp[i])<br \/>   {<br \/>   cnt++;<br \/>   while (i &lt; len &#038;&#038; 0 != tmp[++i]);<br \/>   }<br \/>   } <br \/>   if (cnt)<br \/>   {<br \/>   \/* And allocate a vector of once or twice of that many elements. *\/<br \/>   arr = dk_alloc_box ((cnt * sizeof (caddr_t)), DV_ARRAY_OF_POINTER);<\/p>\n<p>  ix = 0;<br \/>   for (i = 0; i &lt; len; i++)<br \/>   {<br \/>   if (0 != tmp[i])<br \/>   {<br \/>   int loclen = wcslen(tmp+i);<br \/>   ((caddr_t *) arr)[ix] = ((char *) dk_alloc_box_zero ((loclen + 1)*sizeof(wchar_t), DV_LONG_WIDE));<br \/>   memcpy (((caddr_t *) arr)[ix++], tmp + i, (loclen + 1)*sizeof(wchar_t));<br \/>   while (i &lt; len &#038;&#038; 0 != tmp[++i]);<br \/>   }<br \/>   } <br \/>   }<br \/>   return arr;<br \/>  }<\/p>\n<p>  caddr_t<br \/>  bif_nv_split (caddr_t * qst, caddr_t * err_ret, state_slot_t ** args)<br \/>  {<br \/>   char *me = \u00abnv_split\u00bb;<br \/>   caddr_t arr = NULL;<br \/>   caddr_t arg = bif_arg_unrdf (qst, args, 0, me);<br \/>   dtp_t dtp = DV_TYPE_OF (arg);<br \/>   if (DV_DB_NULL == dtp || NULL == arg)<br \/>   {<br \/>   return (NULL);<br \/>   }<br \/>   if (IS_STRING_DTP(dtp))<br \/>   {<br \/>   wchar_t *wide = box_utf8_as_wide_char (arg, NULL, strlen(arg), 0, DV_WIDE);<br \/>   arr = (caddr_t)nv_split (wide);<br \/>   dk_free_box(wide);<br \/>   return arr;<br \/>   }<br \/>   if (IS_WIDE_STRING_DTP (dtp))<br \/>   {<br \/>   wchar_t *tmp = wcsdup ((const wchar_t *)arg);<br \/>   arr = (caddr_t)nv_split (tmp);<br \/>   free(tmp);<br \/>   return arr;<br \/>   }<\/p>\n<p>   {<br \/>   sqlr_new_error (\u00ab22023\u00bb, \u00abSR007\u00bb,<br \/>   \u00abFunction %s needs a nvstring or NULL as argument, \u201e<br \/>   \u201cnot an arg of type %s (%d)\u00bb,<br \/>   me, 1, dv_type_title (dtp), dtp);<br \/>   }<br \/>   return arr;<br \/>  }<\/p>\n<p>  caddr_t<br \/>  bif_treat_nword (caddr_t * qst, caddr_t * err_ret, state_slot_t ** args)<br \/>  {<br \/>   char *me = \u00abtreat_nword\u00bb;<br \/>   caddr_t arg = bif_arg_unrdf (qst, args, 0, me);<br \/>   int len;<br \/>   const wchar_t *wide = (const wchar_t *)arg;<br \/>   const wchar_t *newwide = NULL;<br \/>   wchar_t *tmpbuf;<br \/>   size_t wordid = 0;<br \/>   dtp_t dtp = DV_TYPE_OF (arg);<br \/>   if (DV_DB_NULL == dtp)<br \/>   {<br \/>   return (NULL);<br \/>   }<br \/>   if (!IS_WIDE_STRING_DTP (dtp))<br \/>   {<br \/>   sqlr_new_error (\u00ab22023\u00bb, \u00abSR007\u00bb,<br \/>   \u00abFunction %s needs a nvstring or NULL as argument, \u201e<br \/>   \u201cnot an arg of type %s (%d)\u00bb,<br \/>   me, 1, dv_type_title (dtp), dtp);<br \/>   }<br \/>   len = wcslen(wide);<br \/>   tmpbuf = (wchar_t *)_alloca (sizeof (wchar_t) * (len + 3));<br \/>   tmpbuf[0] = L&#8217; &#8216;;<br \/>   wcscpy(tmpbuf + 1, wide);<br \/>   tmpbuf[len+1] = L&#8217; &#8216;;<br \/>   tmpbuf[len+2] = 0;<br \/>   newwide = tmpbuf;<\/p>\n<p>  mutex_enter (dict_mtx_);<br \/>   {<br \/>   char*utf8 = box_wide_as_utf8_char ((const char*)wide, len, DV_LONG_STRING);<br \/>   char**place = NULL;<\/p>\n<p>  place = (char **) id_hash_get (ht_dict_, (caddr_t) &#038;utf8);<br \/>   if (place)<br \/>   {<br \/>   dict_item_t *pitem = *(dict_item_t **)place;<br \/>   pitem-&gt;count_++;<br \/>   dk_free_box(utf8);<br \/>   wordid = pitem-&gt;id_;<br \/>   }<br \/>   else<br \/>   {<br \/>   query_instance_t *q = (query_instance_t *)qst;<br \/>   client_connection_t *cli = q-&gt;qi_client;<br \/>   query_t *stmt = NULL;<br \/>   local_cursor_t *lc = NULL;<br \/>   caddr_t lerr = NULL;<br \/>   caddr_t * err = &lerr;<br \/>   char buf[1024];<\/p>\n<p>  dict_item_t *pitem = dk_alloc_box_zero (sizeof(dict_item_t), DV_BIN);<br \/>   pitem-&gt;word_ = utf8;<br \/>   pitem-&gt;count_ = 1;<br \/>   wordid = next_wordid(qst);<br \/>   pitem-&gt;id_ = wordid;<br \/>   id_hash_set (ht_dict_, (caddr_t) &#038;utf8, (caddr_t) &#038;pitem);<br \/>   sethash ((void *)wordid, ht_dict_by_id_, (void*)pitem);<\/p>\n<p>  sprintf (buf, &quot;&#8212;utf8_execs=yes\\n insert into MRC_WORDS(WD_ITSELF, WD_ID) values (?, ?)&quot;);<br \/>   \/\/cast(charset_recode (&#8216;%s&#8217;, &#8216;UTF-8&#8217;, &#8216;_WIDE_&#8217;) as nvarchar))&quot;, wordid, utf8); <br \/>   if (NULL != (stmt = sql_compile (buf, cli, err, 0)))<br \/>   {<br \/>   *err =<br \/>   qr_rec_exec (stmt, cli, &#038;lc, (query_instance_t *) qst, NULL, 2,<br \/>   &quot;:0&quot;, box_wide_string (newwide), QRP_RAW,<br \/>   &quot;:1&quot;, box_num(wordid), QRP_RAW<br \/>   );<br \/>   \/\/*err = qr_rec_exec (stmt, cli, &#038;lc, (query_instance_t *) qst, NULL, 0);<br \/>   }<br \/>   if (lc)<br \/>   lc_free (lc);<br \/>   if (stmt)<br \/>   qr_free (stmt);<\/p>\n<p>  {<br \/>   int len = wcslen(newwide);<br \/>   int i;<br \/>   for (i = 0; i &lt; len-2; i++)<br \/>   {<br \/>   store_triple (qst, wordid, newwide + i);<br \/>   }<br \/>   }<br \/>   }<br \/>   }<br \/>   mutex_leave (dict_mtx_);<br \/>   return box_num(wordid);<br \/>  }<\/p>\n<p>  int64<br \/>  box2long (caddr_t arg)<br \/>  {<br \/>   dtp_t dtp = DV_TYPE_OF (arg);<br \/>   if (dtp == DV_SHORT_INT || dtp == DV_LONG_INT)<br \/>   return (int64)(unbox (arg));<br \/>   else if (dtp == DV_SINGLE_FLOAT)<br \/>   return (int64)(unbox_float (arg));<br \/>   else if (dtp == DV_DOUBLE_FLOAT)<br \/>   return (int64)(unbox_double (arg));<br \/>   else if (dtp == DV_NUMERIC)<br \/>   {<br \/>   int64 dt;<br \/>   numeric_to_int64 ((numeric_t) arg, &#038;dt);<br \/>   return dt;<br \/>   }<br \/>   else if (dtp == DV_DB_NULL)<br \/>   return (int64)(0);<br \/>   assert (0);<br \/>   return 0;<br \/>  }<\/p>\n<p>  void flush_dict()<br \/>  {<br \/>   char **key = NULL;<br \/>   char **val = NULL;<br \/>   id_hash_iterator_t hit;<br \/>   id_hash_iterator (&#038;hit, ht_dict_);<br \/>   while (hit_next (&#038;hit, (caddr_t*) &#038;key, (caddr_t*) &#038;val))<br \/>   {<br \/>   dk_free_box(*key);<br \/>   dk_free_box(*val);<br \/>   }<br \/>   id_hash_clear(ht_dict_);<br \/>  }<\/p>\n<p>  void flush_triples()<br \/>  {<br \/>   char **key = NULL;<br \/>   char **val = NULL;<br \/>   id_hash_iterator_t hit;<br \/>   id_hash_iterator (&#038;hit, ht_triples_);<br \/>   while (hit_next (&#038;hit, (caddr_t*) &#038;key, (caddr_t*) &#038;val))<br \/>   {<br \/>   triple_head_t *phead = *(triple_head_t**)val;<br \/>   triple_item_t *pit = phead-&gt;list_;<\/p>\n<p>  while (pit)<br \/>   {<br \/>   triple_item_t *tmp = pit-&gt;next_;<br \/>   dk_free_box (pit);<br \/>   pit = tmp;<br \/>   }<br \/>   dk_free_box(*val);<br \/>   }<br \/>   id_hash_clear(ht_triples_);<br \/>  }<\/p>\n<p>  size_t reload_triples (query_instance_t *qst)<br \/>  {<br \/>   client_connection_t *cli = qst-&gt;qi_client;<br \/>   query_t *stmt = NULL;<br \/>   local_cursor_t *lc = NULL;<br \/>   caddr_t lerr = NULL;<br \/>   caddr_t * err = &lerr;<br \/>   char buf[1024];<\/p>\n<p>  flush_triples ();<\/p>\n<p>  sprintf (buf, \u00abselect TR_DATA, TR_WORDID from MRC_TRIPLES\u00bb);<br \/>   if (NULL != (stmt = sql_compile (buf, cli, err, 0)))<br \/>   {<br \/>   *err = qr_rec_exec (stmt, cli, &#038;lc, (query_instance_t *) qst, NULL, 0);<br \/>   if (lc)<br \/>   {<br \/>   int64 id = 0;<br \/>   caddr_t tmp = 0;<br \/>   int64 cnt = 0;<br \/>   char*utf8 = NULL;<br \/>   char**place = NULL;<br \/>   lenmem_t lm;<br \/>   triple_head_t *phead = NULL;<br \/>   triple_head_t *ohead = NULL;<br \/>   triple_item_t *pitem = NULL;<\/p>\n<p>  while (lc_next (lc))<br \/>   {<br \/>   if (lc-&gt;lc_error)<br \/>   {<br \/>   *err = box_copy_tree (lc-&gt;lc_error);<br \/>   break;<br \/>   }<br \/>   id = box2long (lc_nth_col (lc, 1));<br \/>   tmp = lc_nth_col (lc, 0);<\/p>\n<p>  pitem = (triple_item_t *)dk_alloc_box_zero(sizeof(triple_item_t), DV_BIN);<br \/>   pitem-&gt;wordid_ = (size_t)id;<\/p>\n<p>  lm.lm_length = sizeof (phead-&gt;data_);<br \/>   lm.lm_memblock = (caddr_t)tmp;<\/p>\n<p>  place = (char **) id_hash_get (ht_triples_, (caddr_t) &#038;lm);<br \/>   if (place)<br \/>   {<br \/>   ohead = *(triple_head_t **)place;<br \/>   pitem-&gt;next_ = ohead-&gt;list_;<br \/>   ohead-&gt;list_ = pitem;<br \/>   }<br \/>   else<br \/>   {<br \/>   phead = (triple_head_t *)dk_alloc_box_zero(sizeof(triple_head_t), DV_BIN);<br \/>   phead-&gt;list_ = pitem;<br \/>   phead-&gt;lm_.lm_length = sizeof (phead-&gt;data_);<br \/>   phead-&gt;lm_.lm_memblock = (caddr_t)phead-&gt;data_;<br \/>   memcpy(phead-&gt;data_, tmp, sizeof (phead-&gt;data_));<\/p>\n<p>  pitem-&gt;next_ = NULL;<br \/>   id_hash_set (ht_triples_, (caddr_t) &#038;phead-&gt;lm_, (caddr_t) &#038;phead);<br \/>   }<br \/>   }<br \/>   }<br \/>   }<br \/>   if (lc)<br \/>   lc_free (lc);<br \/>   if (stmt)<br \/>   qr_free (stmt);<\/p>\n<p>  return 0;<br \/>  }<\/p>\n<p>  caddr_t<br \/>  bif_reload_dict (caddr_t * qst, caddr_t * err_ret, state_slot_t ** args)<br \/>  {<br \/>   query_instance_t *q = (query_instance_t *)qst;<br \/>   client_connection_t *cli = q-&gt;qi_client;<br \/>   query_t *stmt = NULL;<br \/>   local_cursor_t *lc = NULL;<br \/>   caddr_t lerr = NULL;<br \/>   caddr_t * err = &lerr;<br \/>   char buf[1024];<\/p>\n<p>  mutex_enter (dict_mtx_);<br \/>   flush_dict();<\/p>\n<p>  sprintf (buf, \u00abselect WD_ID, WD_ITSELF, WD_COUNT from MRC_WORDS\u00bb);<br \/>   if (NULL != (stmt = sql_compile (buf, cli, err, 0)))<br \/>   {<br \/>   *err = qr_rec_exec (stmt, cli, &#038;lc, (query_instance_t *) qst, NULL, 0);<br \/>   if (lc)<br \/>   {<br \/>   int64 id = 0;<br \/>   caddr_t tmp = 0;<br \/>   int64 cnt = 0;<br \/>   char*utf8 = NULL;<br \/>   char**place = NULL;<br \/>   size_t maxid = 0;<\/p>\n<p>  while (lc_next (lc))<br \/>   {<br \/>   if (lc-&gt;lc_error)<br \/>   {<br \/>   *err = box_copy_tree (lc-&gt;lc_error);<br \/>   break;<br \/>   }<br \/>   id = box2long (lc_nth_col (lc, 0));<br \/>   tmp = lc_nth_col (lc, 1);<br \/>   cnt = box2long (lc_nth_col (lc, 2));<br \/>   utf8 = box_wide_as_utf8_char (tmp, box_length (tmp) \/ sizeof (wchar_t) \u2014 1, DV_LONG_STRING);<\/p>\n<p>  place = (char **) id_hash_get (ht_dict_, (caddr_t) &#038;utf8);<br \/>   if (place)<br \/>   {<br \/>   assert(0);<br \/>   }<br \/>   else<br \/>   {<br \/>   dict_item_t *pitem = dk_alloc_box_zero (sizeof(dict_item_t), DV_BIN);<br \/>   pitem-&gt;word_ = utf8;<br \/>   pitem-&gt;count_ = 1;<br \/>   pitem-&gt;id_ = (size_t)id;<br \/>   if (maxid &lt; id)<br \/>   maxid = id;<br \/>   id_hash_set (ht_dict_, (caddr_t) &#038;utf8, (caddr_t) &#038;pitem);<br \/>   sethash ((void *)id, ht_dict_by_id_, (void*)pitem);<br \/>   }<br \/>   }<br \/>   }<br \/>   }<br \/>   if (lc)<br \/>   lc_free (lc);<br \/>   if (stmt)<br \/>   qr_free (stmt);<\/p>\n<p>  reload_triples(q);<\/p>\n<p>  mutex_leave (dict_mtx_);<br \/>   return 0;<br \/>  }<\/p>\n<p>  \/\/&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;<br \/>  \/\/ l_dist_raw()<br \/>  \/\/ static\/local function!!!<br \/>  \/\/<br \/>  \/\/ Purpose: Calculates the L Distance for the two strings (words).<br \/>  \/\/<br \/>  \/\/ Inputs: char *str1, *str2 \u2014 input strings (words) to compair<br \/>  \/\/ int len1,len2 \u2014 the shorter of the length of str1 amd str2<br \/>  \/\/ respectively or MAX_LDIST_LEN.<br \/>  \/\/ NOTE! No error checking is done.<br \/>  \/\/ Array overflow on the stack will result<br \/>  \/\/ if either is out of range.<br \/>  \/\/ Outputs: none<br \/>  \/\/<br \/>  \/\/ Returns: L Distance value is returned<br \/>  \/\/<br \/>  \/\/ Note, there are two defines immediately after this comment header that<br \/>  \/\/ are only used by this function.<br \/>  \/\/<br \/>  \/\/ (values in all CAPS are defined in the LDIST.H header file)<br \/>  \/\/<br \/>  \/\/&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;&#8212;<br \/>  #define MAX_LDIST_LEN 40 \/\/ max word len to compair<br \/>  #define MIN3(a,b,c) (a &lt; b? \\<br \/>   (a &lt; c? a: c): \\<br \/>   (b &lt; c? b: c))<\/p>\n<p>  int <br \/>  l_dist_raw(const wchar_t *str1, const wchar_t *str2, int len1, int len2)<br \/>  {<br \/>   int arr1[MAX_LDIST_LEN+1];<br \/>   int arr2[MAX_LDIST_LEN+1];<br \/>   int i, j;<br \/>   if (len1 &gt; MAX_LDIST_LEN)<br \/>   len1 = MAX_LDIST_LEN;<br \/>   if (len2 &gt; MAX_LDIST_LEN)<br \/>   len2 = MAX_LDIST_LEN;<br \/>   for (i = 0; i &lt;= len2; i++)<br \/>   arr1[i] = i;<\/p>\n<p>  for (i = 1; i &lt;= len1; i++)<br \/>   {<br \/>   arr2[0] = i;<br \/>   for (j = 1; j &lt;= len2; j++)<br \/>   {<br \/>   int score = (str1[i-1] == str2[j-1])?0:1;<br \/>   int i1 = arr2[j-1]+1;<br \/>   int i2 = arr1[j]+1;<br \/>   int i3 = arr1[j-1] + score;<br \/>   arr2[j] = MIN3 (i1, i2, i3);\/\/arr2[j-1]+1, arr1[j]+1, arr1[j] + score);<br \/>   \/\/d[(j-1)*n+i]+1, d[j*n+i-1]+1, d[(j-1)*n+i-1]+cost);<br \/>   }<br \/>   memcpy (arr1, arr2, sizeof (int)*(len2+1));<br \/>   }<br \/>   return arr2[len2];<br \/>  }<\/p>\n<p>  struct ipair_s {<br \/>   ptrlong id_;<br \/>   ptrlong len_;<br \/>   ptrlong pos_;<br \/>   ptrlong score_;<br \/>   };<br \/>   typedef struct ipair_s ipair_t;<\/p>\n<p>  int <br \/>  cmp_pairs (const void *a,const void *b)<br \/>  {<br \/>   const ipair_t *pa = *(const ipair_t **)a;<br \/>   const ipair_t *pb = *(const ipair_t **)b;<br \/>   if (pb-&gt;id_ == pa-&gt;id_)<br \/>   return pa-&gt;score_ \u2014 pb-&gt;score_;<br \/>   return pa-&gt;id_ \u2014 pb-&gt;id_;<br \/>  }<\/p>\n<p>  int compare_by_id(const void *a, const void *b, const void *arg)<br \/>  {<br \/>   ipair_t *pa = (ipair_t*)a;<br \/>   ipair_t *pb = (ipair_t*)b;<br \/>   return pa-&gt;id_ \u2014 pb-&gt;id_;<br \/>  }<\/p>\n<p>  int compare_by_score(const void *a, const void *b, const void *arg)<br \/>  {<br \/>   ipair_t *pa = (ipair_t*)a;<br \/>   ipair_t *pb = (ipair_t*)b;<br \/>   return pb-&gt;score_ \u2014 pa-&gt;score_;<br \/>  }<\/p>\n<p>  dk_set_t<br \/>  load_oid_list (ipair_t **words, query_instance_t *q, mem_pool_t * mp)<br \/>  {<br \/>   client_connection_t *cli = q-&gt;qi_client;<br \/>   \/*static*\/ query_t *stmt = NULL;<br \/>   local_cursor_t *lc = NULL;<br \/>   caddr_t lerr = NULL;<br \/>   caddr_t * err = &lerr;<br \/>   dk_set_t out_list = NULL;<br \/>   char buf[1024];<br \/>   dk_set_t pairs_list = NULL;<br \/>   ipair_t *item = NULL;<br \/>   size_t i = 0;<br \/>   size_t len = box_length(words)\/sizeof(ipair_t*);<br \/>   size_t cnt = 0;<\/p>\n<p>  if (NULL == stmt)<br \/>   {<br \/>   sprintf (buf, \u00abselect DT_OID, DT_POSITION from MRC_DATA where DT_WORDID =? \u201e);<br \/>   if (NULL == (stmt = sql_compile_static (buf, \/*bootstrap_*\/cli, err, 0)))<br \/>   return NULL;<br \/>   }<br \/>   for (i = 0; i&lt; len; i++)<br \/>   {<br \/>   size_t id;<br \/>   ipair_t *pair = words[i];<br \/>  \/\/printf(\u201c\\n&#8212;%d &#8212;&#8212;\\n\u00bb,pair-&gt;id_);<br \/>   *err = qr_rec_exec (stmt, cli, &#038;lc, (query_instance_t *) q, NULL, 1,<br \/>   &quot;:0&quot;, box_num (pair-&gt;id_), QRP_RAW);<br \/>   if (NULL == lc)<br \/>   continue;<\/p>\n<p>  while (lc_next (lc))<br \/>   {<br \/>   if (lc-&gt;lc_error)<br \/>   {<br \/>   *err = box_copy_tree (lc-&gt;lc_error);<br \/>   break;<br \/>   }<br \/>   id = box2long (lc_nth_col (lc, 0));<br \/>   item = (ipair_t*)mp_alloc_box(mp, sizeof(ipair_t), DV_ARRAY_OF_LONG);<br \/>   item-&gt;id_ = id;<br \/>   item-&gt;len_ = pair-&gt;len_;<br \/>   item-&gt;pos_ = box2long (lc_nth_col (lc, 1));<br \/>   item-&gt;score_ = pair-&gt;score_;<br \/>   mp_set_push (mp, &#038;pairs_list, item);<br \/>   cnt++;<br \/>  \/\/ printf(&quot;%d &quot;, id);<br \/>   }<br \/>   if (lc)<br \/>   lc_free (lc);<br \/>   }<br \/>   if (stmt)<br \/>   qr_free (stmt);<\/p>\n<p>  \/\/if (stmt)<br \/>   \/\/ qr_free (stmt);<br \/>   \/\/printf(&quot;+%d+&quot;, cnt);<br \/>   return list_sort (pairs_list, compare_by_id, NULL);<br \/>  }<\/p>\n<p>  ipair_t **<br \/>  get_word_candidates (wchar_t *arg)<br \/>  {<br \/>   ipair_t **res = NULL;<br \/>   dk_set_t ids_list = NULL;<br \/>   caddr_t arr = NULL;<\/p>\n<p>  {<br \/>   dk_hash_t *ht_ids = NULL;<br \/>   int maxcount = 1;<br \/>   size_t i;<\/p>\n<p>  wchar_t *word = (wchar_t*)arg;<br \/>   wchar_t *pbuf = NULL;<br \/>   size_t isnum = ((*word) &gt;= L&#8217;0&#8242; &#038;&#038; (*word) &lt;= L&#8217;9&#8242;);<br \/>   size_t len = wcslen (word);<br \/>   \/\/int slen = len;<br \/>   if (len &lt; 3 &#038;&#038; !isnum)<br \/>   {<br \/>   return NULL;<br \/>   }<br \/>   word = (wchar_t*)box_copy (word);<br \/>   mrc_toupper_str (word);<\/p>\n<p>  pbuf = (wchar_t *)_alloca (sizeof (wchar_t) * (len + 3));<br \/>   pbuf[0] = L&#8217; &#8216;;<br \/>   wcscpy (pbuf + 1, word);<br \/>   pbuf[len + 1] = L&#8217; &#8216;;<br \/>   pbuf[len + 2] = L&#8217;\\0&#8242;;<\/p>\n<p>  ht_ids = hash_table_allocate (101);<\/p>\n<p>  mutex_enter (dict_mtx_);<br \/>   for (i = 0; i &lt; (len); i++)<br \/>   {<br \/>   char**place = NULL;<br \/>   lenmem_t lm;<br \/>   triple_head_t *phead = NULL;<br \/>   triple_item_t *pitem = NULL;<br \/>   wchar_t trbuf[4];<br \/>   trbuf[0] = mrc_toupper(pbuf[i]);<br \/>   trbuf[1] = mrc_toupper(pbuf[i + 1]);<br \/>   trbuf[2] = mrc_toupper(pbuf[i + 2]);<br \/>   trbuf[3] = L&#8217;\\0&#8242;;<\/p>\n<p>  lm.lm_length = sizeof (phead-&gt;data_);<br \/>   lm.lm_memblock = (caddr_t)trbuf;<\/p>\n<p>  place = (char **) id_hash_get (ht_triples_, (caddr_t) &#038;lm);<br \/>   if (place)<br \/>   {<br \/>   phead = *(triple_head_t**)place;<br \/>   pitem = phead-&gt;list_;<br \/>   while(pitem)<br \/>   {<br \/>   int wordid = pitem-&gt;wordid_;<\/p>\n<p>  int ptr = (int)gethash ((void *)wordid, ht_ids);<br \/>   if (0 == ptr)<br \/>   sethash ((void *)wordid, ht_ids, (void*)1);<br \/>   else<br \/>   {<br \/>   sethash ((void *)wordid, ht_ids, (void*)(++ptr));<br \/>   if (ptr &gt; maxcount)<br \/>   maxcount = ptr;<br \/>   }<\/p>\n<p>  pitem = pitem-&gt;next_;<br \/>   }<br \/>   }<br \/>   }<br \/>   mutex_leave (dict_mtx_);<\/p>\n<p>  {<br \/>   dk_set_t pairs_list = NULL;<br \/>   int nids = 0;<br \/>   int mx = maxcount;<br \/>   int nallids = ht_ids-&gt;ht_count;<br \/>   void *key, *val;<br \/>   dk_hash_iterator_t hit;<\/p>\n<p>  maxcount = (maxcount + 1)\/2;<br \/>   if (maxcount &gt;= len)<br \/>   maxcount = len \u2014 1;<br \/>   for (dk_hash_iterator (&#038;hit, ht_ids);<br \/>   dk_hit_next (&#038;hit, (void**) &#038;key, (void**) &#038;val);<br \/>   \/* *\/)<br \/>   {<br \/>   int wordid = (int)key;<br \/>   int cnt = (int)val;<br \/>   if (cnt &gt;= maxcount)<br \/>   {<br \/>   dict_item_t *pptr = (dict_item_t *)gethash ((void *)wordid, ht_dict_by_id_);<br \/>   if(pptr)<br \/>   {<br \/>   ipair_t *item = NULL;<br \/>   wchar_t buf[128];<br \/>   size_t lbuf, dist, score;<br \/>   box_utf8_as_wide_char ((caddr_t)pptr-&gt;word_, (caddr_t)buf, strlen(pptr-&gt;word_), 127, DV_WIDE);<br \/>   lbuf = wcslen(buf);<br \/>   dist = l_dist_raw(word, buf, len, lbuf);<br \/>   score = 100 \u2014 (dist * 100)\/((len &gt; lbuf)? len: lbuf);<br \/>   \/\/score = 100 \u2014 (dist * 200)\/(len + lbuf);<br \/>   if (word[0] != buf[0])<br \/>   score = (score * 3)&gt;&gt;2;<br \/>   \/\/score = 100 \u2014 (dist * 100)\/((len &gt; lbuf)? len: lbuf);<br \/>   \/\/wprintf (L&quot;%s -&gt; %s (%d)\\n&quot;, word, buf, score);<br \/>   item = (ipair_t*)dk_alloc_box(sizeof(ipair_t), DV_ARRAY_OF_LONG);<br \/>   item-&gt;id_ = wordid;<br \/>   item-&gt;len_ = lbuf;<br \/>   item-&gt;score_ = score;<br \/>   dk_set_push (&#038;pairs_list, item);<br \/>   nids++;<br \/>   }<br \/>   assert(pptr);<br \/>   }<br \/>   }<br \/>   if (pairs_list)<br \/>   {<br \/>   res = (ipair_t**)dk_set_to_array (pairs_list);<br \/>   dk_set_free (pairs_list);<br \/>   assert(nids == box_length(res)\/sizeof(void*));<br \/>   qsort (res, nids, sizeof (void*), cmp_pairs);<br \/>   }<br \/>   }<br \/>   hash_table_free (ht_ids);<br \/>   dk_free_box(word);<br \/>   }<br \/>   return res;<br \/>  }<\/p>\n<p>  caddr_t<br \/>  bif_get_word_candidates (caddr_t * qst, caddr_t * err_ret, state_slot_t ** args)<br \/>  {<br \/>   char *me = \u00abget_word_candidates\u00bb;<br \/>   ipair_t **res = NULL;<br \/>   dk_set_t ids_list = NULL;<br \/>   caddr_t arr = NULL;<br \/>   caddr_t arg = bif_arg_unrdf (qst, args, 0, me);<br \/>   dtp_t dtp = DV_TYPE_OF (arg);<br \/>   if (DV_DB_NULL == dtp)<br \/>   {<br \/>   return (NULL);<br \/>   }<br \/>   if (!IS_WIDE_STRING_DTP (dtp))<br \/>   {<br \/>   sqlr_new_error (\u00ab22023\u00bb, \u00abSR007\u00bb,<br \/>   \u00abFunction %s needs a nvstring or NULL as argument, \u201e<br \/>   \u201cnot an arg of type %s (%d)\u00bb,<br \/>   me, 1, dv_type_title (dtp), dtp);<br \/>   }<\/p>\n<p>  if (0 == ht_dict_-&gt;ht_count)<br \/>   {<br \/>   bif_reload_dict (qst, err_ret, args);<br \/>   }<\/p>\n<p>  res = get_word_candidates ((wchar_t*)arg);<\/p>\n<p>  ids_list = load_oid_list (res, (query_instance_t *)qst, NULL);<br \/>   DO_SET (ipair_t *, item, &#038;ids_list)<br \/>   {<br \/>   \/\/printf (&quot;%d &quot;, item-&gt;id_);<br \/>   dk_free_box(item);<br \/>   }<br \/>   END_DO_SET ();<\/p>\n<p>   return (caddr_t)res;<br \/>  }<\/p>\n<p>  caddr_t<br \/>  bif_calc_similarity (caddr_t * qst, caddr_t * err_ret, state_slot_t ** args)<br \/>  {<br \/>   char *me = \u00abcalc_similarity\u00bb;<br \/>   caddr_t arg1 = bif_arg_unrdf (qst, args, 0, me);<br \/>   caddr_t arg2 = bif_arg_unrdf (qst, args, 1, me);<br \/>   dtp_t dtp1 = DV_TYPE_OF (arg1);<br \/>   dtp_t dtp2 = DV_TYPE_OF (arg2);<br \/>   if (DV_DB_NULL == dtp1 || DV_DB_NULL == dtp2)<br \/>   {<br \/>   return (NULL);<br \/>   }<br \/>   if ((!IS_WIDE_STRING_DTP (dtp1)) || (!IS_WIDE_STRING_DTP (dtp2)))<br \/>   {<br \/>   sqlr_new_error (\u00ab22023\u00bb, \u00abSR007\u00bb,<br \/>   \u00abFunction %s needs a nvstring or NULL as arguments, \u201e);<br \/>   }<br \/>   {<br \/>   wchar_t *str1 = (wchar_t*)arg1;<br \/>   wchar_t *str2 = (wchar_t*)arg2;<br \/>   int l1 = wcslen(str1);<br \/>   int l2 = wcslen(str2);<br \/>   int dist = l_dist_raw(str1, str2, l1, l2);<br \/>   int score = 100 \u2014 (dist * 100)\/((l1 &gt; l2)? l1: l2);<br \/>   if (str1[0] != str2[0])<br \/>   score = (score * 3)&gt;&gt;2;<br \/>   return score;<br \/>   }<br \/>  }<br \/>  static int g_cnt = 0;<br \/>  #if defined WIN32 &#038;&#038; defined (_DEBUG)<br \/>   static _CrtMemState checkPt1;<br \/>  #endif<\/p>\n<p>  long sqrt_long(long r)<br \/>  {<br \/>   long t, b, c = 0;<br \/>   assert (r &gt;= 0);<\/p>\n<p>  for (b=0x10000000; b != 0; b &gt;&gt;= 2) <br \/>   {<br \/>   t = c + b;<br \/>   c &gt;&gt;= 1;<br \/>   if (t &lt;= r) <br \/>   {<br \/>   r -= t;<br \/>   c += b;<br \/>   }<br \/>   }<br \/>   return\u00a9;<br \/>  }<\/p>\n<p>  caddr_t<br \/>  bif_query_phrase (caddr_t * qst, caddr_t * err_ret, state_slot_t ** args)<br \/>  {<br \/>   char *me = \u201cquery_phrase\u00bb;<br \/>   wchar_t **words = NULL;<br \/>   ptrlong *res = NULL;<br \/>   wchar_t *tmp = NULL;<br \/>   dk_set_t ids_list = NULL;<br \/>   caddr_t arr = NULL;<br \/>   caddr_t arg = bif_arg_unrdf (qst, args, 0, me);<br \/>   dtp_t dtp = DV_TYPE_OF (arg);<br \/>   int len = 0;<br \/>   mem_pool_t *mp = mem_pool_alloc();<\/p>\n<p>  #if 0\/\/defined WIN32 &#038;&#038; defined (_DEBUG)<br \/>   _CrtCheckMemory( );<br \/>   _CrtMemCheckpoint( &#038;checkPt1 );<br \/>  #endif<\/p>\n<p>  \/\/if (0 == (g_cnt%1000))<br \/>   \/\/printf (&quot;%d &quot;, g_cnt);<br \/>   ++g_cnt;<br \/>   if (DV_DB_NULL == dtp)<br \/>   {<br \/>   return (NULL);<br \/>   }<br \/>   if (IS_STRING_DTP(dtp))<br \/>   {<br \/>   tmp = box_utf8_as_wide_char (arg, NULL, strlen(arg), 0, DV_WIDE);<br \/>   words = nv_split (tmp);<br \/>   dk_free_box(tmp);<br \/>   }<br \/>   else if (IS_WIDE_STRING_DTP (dtp))<br \/>   {<br \/>   tmp = wcsdup ((const wchar_t *)arg);<br \/>   words = nv_split (tmp);<br \/>   free(tmp);<br \/>   }<br \/>   else<br \/>   {<br \/>   sqlr_new_error (\u00ab22023\u00bb, \u00abSR007\u00bb,<br \/>   \u00abFunction %s needs a nvstring or NULL as argument, \u201e<br \/>   \u201cnot an arg of type %s (%d)\u00bb,<br \/>   me, 1, dv_type_title (dtp), dtp);<br \/>   }<\/p>\n<p>  if (0 == ht_dict_-&gt;ht_count)<br \/>   {<br \/>   bif_reload_dict (qst, err_ret, args);<br \/>   }<\/p>\n<p>  \/\/mutex_enter (dict_mtx_);<\/p>\n<p>  if (words)<br \/>   {<br \/>   size_t niters = box_length(words)\/sizeof(void*);<br \/>   dk_set_t results = NULL;<br \/>   dk_set_t *iter_holders = mp_alloc_box (mp, niters * sizeof(dk_set_t), DV_ARRAY_OF_POINTER);<br \/>   dk_set_t *iters = mp_alloc_box (mp, niters * sizeof(dk_set_t), DV_ARRAY_OF_POINTER);<br \/>   size_t i = 0;<br \/>   size_t ix = 0;<br \/>   size_t cnt = 0;<br \/>   size_t cnt1 = 0;<br \/>   for (i = 0; i &lt;niters; i++)<br \/>   {<br \/>   ipair_t **res = get_word_candidates ((wchar_t*)words[i]);<br \/>   if (res)<br \/>   {<br \/>   iter_holders[ix] = load_oid_list (res, (query_instance_t *)qst, mp);<br \/>   iters[ix] = iter_holders[ix];<br \/>   ix++;<br \/>   dk_free_tree (res);<br \/>   }<br \/>   }<br \/>   niters = ix;<\/p>\n<p>  if (niters)<br \/>   {<br \/>   int64 min_elem = 0;<br \/>   int fin = 0;<br \/>   for (;!fin;)<br \/>   {<br \/>   int bfound = 1;<br \/>   size_t div = 1;<br \/>   size_t score = 1;<br \/>   size_t sumpos = 0;<br \/>   size_t oldpos = 0;<\/p>\n<p>  if (!iters[0])<br \/>   break;<br \/>   min_elem = ((ipair_t *)iters[0]-&gt;data)-&gt;id_;<br \/>   for (i = 0; i &lt;niters; i++)<br \/>   {<br \/>   if (iters[i])<br \/>   {<br \/>   ipair_t *ptr = (ipair_t *)iters[i]-&gt;data;<br \/>   div *= 100;<br \/>   score *= ptr-&gt;score_;<br \/>   if (i)<br \/>   {<br \/>   sumpos += abs (oldpos \u2014 ptr-&gt;pos_);<br \/>   }<br \/>   oldpos = ptr-&gt;pos_;<br \/>   if (ptr-&gt;id_ != min_elem)<br \/>   {<br \/>   bfound = 0;<br \/>   }<br \/>   if (ptr-&gt;id_ &lt; min_elem)<br \/>   {<br \/>   min_elem = ptr-&gt;id_;<br \/>   }<br \/>   }<br \/>   }<br \/>   if (bfound)<br \/>   {<br \/>   ipair_t *item = mp_alloc_box(mp, sizeof(ipair_t), DV_BIN);<br \/>   div \/= 100;<br \/>   score \/= div;<br \/>   if (niters &gt; 1)<br \/>   sumpos \/= (niters \u2014 1);<br \/>   item-&gt;id_ = min_elem;<br \/>   item-&gt;score_ = score\/(1 + (sqrt_long(((100*sumpos)\/5))\/10));<br \/>   mp_set_push(mp, &#038;results, item);<br \/>   cnt1++;<\/p>\n<p>  \/\/printf (\u00abFOUND:%I64d %d\\n\u00bb, min_elem, score); <br \/>   }<br \/>   for (i = 0; i &lt;niters; i++)<br \/>   {<br \/>   int bf = bfound;<br \/>   while (iters[i] &#038;&#038; (bf || min_elem == ((ipair_t *)iters[i]-&gt;data)-&gt;id_))<br \/>   {<br \/>   bf = 0;<br \/>   iters[i] = iters[i]-&gt;next;<br \/>   }<br \/>   if (!iters[i])<br \/>   {<br \/>   fin = 1;<br \/>   break;<br \/>   }<br \/>   }<br \/>   }<br \/>   }<\/p>\n<p>  for (i = 0; i &lt;niters; i++)<br \/>   {<br \/>   DO_SET (ipair_t *, item, &#038;iter_holders[i])<br \/>   {<br \/>   cnt++;<br \/>   \/\/dk_free_box(item);<br \/>   }<br \/>   END_DO_SET ();<br \/>   }<br \/>   \/\/dk_free_box (iters);<br \/>   \/\/dk_free_box (iter_holders);<\/p>\n<p>  \/\/printf (&quot;-%d-&quot;, cnt);<br \/>   len = dk_set_length(results);<br \/>   \/\/if (len &gt; 100)<br \/>   { <br \/>   results = list_sort (results, compare_by_score, NULL);<br \/>   i = 0;<br \/>   DO_SET (ipair_t *, entry, &#038;results)<br \/>   {<br \/>   entry-&gt;len_ = (i &gt;= 100)? 0:1;<br \/>   i++;<br \/>   }<br \/>   END_DO_SET();<br \/>   len = (len&gt;100)?100:len;<br \/>   }<br \/>   \/\/results = list_sort (results, compare_by_id, NULL);<br \/>   i = 0;<br \/>   res = dk_alloc_box(len * 2 * sizeof(ptrlong), DV_ARRAY_OF_LONG);<br \/>   DO_SET (ipair_t *, entry, &#038;results)<br \/>   {<br \/>   if (entry-&gt;len_)<br \/>   {<br \/>   res[i++] = (entry-&gt;id_);<br \/>   res[i++] = (entry-&gt;score_);<br \/>   }<br \/>   \/\/dk_free_box(entry);<br \/>   cnt1&#8212;;<br \/>   }<br \/>   END_DO_SET();<br \/>   \/\/dk_set_free (results);<br \/>   dk_free_tree (words);<br \/>   \/\/printf(&quot;(%d)&quot;, cnt1);<br \/>   }<br \/>   \/\/mutex_leave (dict_mtx_);<br \/>   mp_free (mp);<\/p>\n<p>  #if 0\/\/defined WIN32 &#038;&#038; defined (_DEBUG)<br \/>   \/\/ _CrtMemDumpAllObjectsSince( NULL );<br \/>   _CrtMemDumpAllObjectsSince( &#038;checkPt1 );<br \/>   _CrtMemCheckpoint( &#038;checkPt1 );<br \/>   _CrtMemDumpStatistics( &#038;checkPt1 );<br \/>   _CrtCheckMemory( );<br \/>  #endif<br \/>   return (caddr_t)res;<br \/>  }<\/p>\n<p>  void<br \/>  init_dict (void)<br \/>  {<br \/>   dict_mtx_ = mutex_allocate ();<br \/>   ht_dict_ = id_hash_allocate (2039, sizeof (caddr_t), sizeof (caddr_t), strhash, strhashcmp);<br \/>   ht_triples_ = id_hash_allocate (2039, sizeof (lenmem_t), sizeof (caddr_t), lenmemhash, lenmemhashcmp);<br \/>   ht_dict_by_id_ = hash_table_allocate (2039);<\/p>\n<p>  bif_define (\u00abnv_split\u00bb, bif_nv_split);<br \/>   bif_define (\u00abtreat_nword\u00bb, bif_treat_nword);<br \/>   bif_define (\u00abcalc_similarity\u00bb, bif_calc_similarity);<br \/>   bif_define (\u00abreload_dict\u00bb, bif_reload_dict);<br \/>   bif_define (\u00abget_word_candidates\u00bb, bif_get_word_candidates);<br \/>   bif_define (\u00abquery_phrase\u00bb, bif_query_phrase);<br \/>  }<\/p>\n<p>  void finit_dict()<br \/>  {<br \/>   flush_triples();<br \/>   flush_dict();<\/p>\n<p>  hash_table_free (ht_dict_by_id_);<br \/>   id_hash_free (ht_triples_);<br \/>   id_hash_free (ht_dict_);<br \/>   mutex_free (dict_mtx_);<br \/>  }<\/p>\n<p>  extern int f_foreground;<\/p>\n<p>  int<br \/>  main (int argc, char *argv[])<br \/>  {<br \/>   \/*f_foreground = 1;<br \/>   * FIXME: this could not be done in that way; this is a GPF on WIN32 and<br \/>   * copy on write on linux; a fuinction from the shared object must be used<br \/>   * to set it<br \/>   *\/<br \/>  #ifdef MALLOC_DEBUG<br \/>   dbg_malloc_enable();<br \/>  #endif<br \/>   build_set_special_server_model (\u00abMircalo\u00bb);<br \/>   VirtuosoServerSetInitHook (init_dict);<br \/>   return VirtuosoServerMain (argc, argv);<br \/>  }  <\/div>\n<\/div>\n<p>  <b>PPS<\/b>: \u0432 \u043a\u0430\u0447\u0435\u0441\u0442\u0432\u0435 \u0438\u043b\u043b\u044e\u0441\u0442\u0440\u0430\u0446\u0438\u0438 \u0438\u0441\u043f\u043e\u043b\u044c\u0437\u043e\u0432\u0430\u043d\u0430 \u0440\u0430\u0431\u043e\u0442\u0430 \u0412\u043b\u0430\u0434\u0438\u043c\u0438\u0440\u0430 \u0420\u0443\u043c\u044f\u043d\u0446\u0435\u0432\u0430, \u0438\u0437\u043e\u0431\u0440\u0430\u0436\u0435\u043d\u0438\u0435 \u043a\u043e\u0442\u043e\u0440\u043e\u0439 \u0432\u0437\u044f\u0442\u043e <a href=\"http:\/\/blogs.privet.ru\/community\/Cultural_groups\/103537706\">\u0437\u0434\u0435\u0441\u044c<\/a>.    \t<\/p>\n<div class=\"clear\"><\/div>\n<\/p><\/div>\n<p> \u0441\u0441\u044b\u043b\u043a\u0430 \u043d\u0430 \u043e\u0440\u0438\u0433\u0438\u043d\u0430\u043b \u0441\u0442\u0430\u0442\u044c\u0438 <a href=\"http:\/\/habrahabr.ru\/post\/206066\/\"> http:\/\/habrahabr.ru\/post\/206066\/<\/a><\/p>\n","protected":false},"excerpt":{"rendered":"<div class=\"content html_format\">   \t<img decoding=\"async\" src=\"http:\/\/habr.habrastorage.org\/post_images\/132\/9fd\/1f6\/1329fd1f6566c35ae7238856584e78f5.jpg\" alt=\"\u0412\u043b\u0430\u0434\u0438\u043c\u0438\u0440 \u0420\u0443\u043c\u044f\u043d\u0446\u0435\u0432 - \u043f\u0440\u0438\u043a\u043b\u044e\u0447\u0435\u043d\u0438\u044f \u041f\u0438\u0442\u0435\u0440\u0441\u043a\u043e\u0433\u043e... \u043a\u043e\u0442\u0430\"\/><br \/>  \u0421\u0443\u0449\u0435\u0441\u0442\u0432\u0443\u0435\u0442 \u0443\u0441\u0442\u043e\u0439\u0447\u0438\u0432\u043e\u0435 \u043c\u043d\u0435\u043d\u0438\u0435, \u0447\u0442\u043e \u043d\u0435\u0447\u0435\u0442\u043a\u0438\u0439 \u043f\u043e\u0438\u0441\u043a \u0432 \u0434\u0438\u043d\u0430\u043c\u0438\u043a\u0435 (\u043e\u043d\u043b\u0430\u0439\u043d) <br \/>  \u043c\u0430\u043b\u043e\u0434\u043e\u0441\u0442\u0443\u043f\u0435\u043d \u0432 \u0441\u0438\u043b\u0443 \u0441\u0432\u043e\u0435\u0439 \u043d\u0435\u0432\u0435\u0440\u043e\u044f\u0442\u043d\u043e\u0439 \u0441\u043b\u043e\u0436\u043d\u043e\u0441\u0442\u0438. <br \/>  \u0414\u0430\u043b\u0435\u0435 \u043c\u044b \u0431\u0443\u0434\u0435\u043c \u0440\u0430\u0437\u0432\u0435\u0438\u0432\u0430\u0442\u044c \u044d\u0442\u043e \u0434\u043e\u0441\u0430\u0434\u043d\u043e\u0435 \u0437\u0430\u0431\u043b\u0443\u0436\u0434\u0435\u043d\u0438\u0435 \u0438 \u043f\u043e\u043a\u0430\u0436\u0435\u043c, <br \/>  \u0447\u0442\u043e \u043f\u043e\u0441\u0442\u0440\u043e\u0438\u0442\u044c \u0441\u0432\u043e\u044e \u0441\u043e\u0431\u0441\u0442\u0432\u0435\u043d\u043d\u0443\u044e \u043f\u043e\u0438\u0441\u043a\u043e\u0432\u0443\u044e \u0441\u0438\u0441\u0442\u0435\u043c\u0443 \u0441\u043e \u0441\u043d\u043e\u0441\u043d\u043e\u0439 \u043f\u0440\u043e\u0438\u0437\u0432\u043e\u0434\u0438\u0442\u0435\u043b\u044c\u043d\u043e\u0441\u0442\u044c\u044e<br \/>   \u043d\u0430 \u043d\u0435 \u0442\u0430\u043a\u0438\u0445 \u0443\u0436 \u0438 \u043c\u0430\u043b\u0435\u043d\u044c\u043a\u0438\u0445 \u0434\u0430\u043d\u043d\u044b\u0445 \u0434\u043e\u0441\u0442\u0443\u043f\u043d\u043e \u043a\u0430\u0436\u0434\u043e\u043c\u0443.  <\/p>\n","protected":false},"author":1,"featured_media":0,"comment_status":"open","ping_status":"open","sticky":false,"template":"","format":"standard","meta":{"footnotes":""},"categories":[],"tags":[],"class_list":["post-206066","post","type-post","status-publish","format-standard","hentry"],"_links":{"self":[{"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/posts\/206066","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=206066"}],"version-history":[{"count":0,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=\/wp\/v2\/posts\/206066\/revisions"}],"wp:attachment":[{"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=206066"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=206066"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/savepearlharbor.com\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=206066"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}