commit 38691d9: [Project] Some efforts to fix rescore
Vsevolod Stakhov
vsevolod at highsecure.ru
Mon Jul 1 16:28:10 UTC 2019
Author: Vsevolod Stakhov
Date: 2019-07-01 15:05:52 +0100
URL: https://github.com/rspamd/rspamd/commit/38691d998d019ac0fba95720c337e3f9badf55c4
[Project] Some efforts to fix rescore
---
lualib/rspamadm/rescore.lua | 31 ++++++++++---------------------
1 file changed, 10 insertions(+), 21 deletions(-)
diff --git a/lualib/rspamadm/rescore.lua b/lualib/rspamadm/rescore.lua
index dfa73f2d5..9b4d3a4ce 100644
--- a/lualib/rspamadm/rescore.lua
+++ b/lualib/rspamadm/rescore.lua
@@ -14,10 +14,6 @@ See the License for the specific language governing permissions and
limitations under the License.
]]--
-if not rspamd_config:has_torch() then
- return
-end
-
local lua_util = require "lua_util"
local ucl = require "ucl"
local logger = require "rspamd_logger"
@@ -26,9 +22,6 @@ local rspamd_util = require "rspamd_util"
local argparse = require "argparse"
local rescore_utility = require "rescore_utility"
--- Load these lazily
-local torch
-local nn
local opts
local ignore_symbols = {
@@ -137,17 +130,17 @@ parser:option "--l2"
local function make_dataset_from_logs(logs, all_symbols, spam_score)
-- Returns a list of {input, output} for torch SGD train
- local dataset = {}
+ local inputs = {}
+ local outputs = {}
for _, log in pairs(logs) do
- local input = torch.Tensor(#all_symbols)
- local output = torch.Tensor(1)
+
log = lua_util.rspamd_str_split(log, " ")
if log[1] == "SPAM" then
- output[1] = 1
+ outputs[#outputs+1] = 1
else
- output[1] = 0
+ outputs[#outputs+1] = 0
end
local symbols_set = {}
@@ -158,23 +151,19 @@ local function make_dataset_from_logs(logs, all_symbols, spam_score)
end
end
+ local input_vec = {}
for index, symbol in pairs(all_symbols) do
if symbols_set[symbol] then
- input[index] = 1
+ input_vec[index] = 1
else
- input[index] = 0
+ input_vec[index] = 0
end
end
- dataset[#dataset + 1] = {input, output}
-
- end
-
- function dataset:size()
- return #dataset
+ inputs[#inputs + 1] = input_vec
end
- return dataset
+ return inputs,outputs
end
local function init_weights(all_symbols, original_symbol_scores)
More information about the Commits
mailing list