
halfak at svn
Apr 7, 2012, 11:09 AM
Post #1 of 1
(44 views)
Permalink
|
|
SVN: [114794] trunk/tools/wsor/newbie_warnings/R
|
|
https://www.mediawiki.org/wiki/Special:Code/MediaWiki/114794 Revision: 114794 Author: halfak Date: 2012-04-07 18:08:59 +0000 (Sat, 07 Apr 2012) Log Message: ----------- old updates to unimportant scripts Modified Paths: -------------- trunk/tools/wsor/newbie_warnings/R/loader/load_huggling_codings_mk2.R trunk/tools/wsor/newbie_warnings/R/outcomes.R trunk/tools/wsor/newbie_warnings/R/outcomes_mk2.R Modified: trunk/tools/wsor/newbie_warnings/R/loader/load_huggling_codings_mk2.R =================================================================== --- trunk/tools/wsor/newbie_warnings/R/loader/load_huggling_codings_mk2.R 2012-04-07 01:08:35 UTC (rev 114793) +++ trunk/tools/wsor/newbie_warnings/R/loader/load_huggling_codings_mk2.R 2012-04-07 18:08:59 UTC (rev 114794) @@ -22,8 +22,6 @@ HUGGLING_CODINGS_MK2$experimental = HUGGLING_CODINGS_MK2$experimental == 1 - HUGGLING_CODINGS_MK2$edits_own_talkpage = HUGGLING_CODINGS_MK2$edits_own_talkpage > 0 - HUGGLING_CODINGS_MK2$edits_hugglers_talkpage = HUGGLING_CODINGS_MK2$edits_hugglers_talkpage > 0 HUGGLING_CODINGS_MK2$responds_own_talk = HUGGLING_CODINGS_MK2$responds_own_talk > 0 HUGGLING_CODINGS_MK2$responds_elsewhere = HUGGLING_CODINGS_MK2$responds_elsewhere > 0 HUGGLING_CODINGS_MK2$is_anon = HUGGLING_CODINGS_MK2$is_anon > 0 Modified: trunk/tools/wsor/newbie_warnings/R/outcomes.R =================================================================== --- trunk/tools/wsor/newbie_warnings/R/outcomes.R 2012-04-07 01:08:35 UTC (rev 114793) +++ trunk/tools/wsor/newbie_warnings/R/outcomes.R 2012-04-07 18:08:59 UTC (rev 114794) @@ -100,6 +100,49 @@ )) # +# +# +regressions = function(messaged_codings){ + outcomes = list() + for(outcome in c("stay", "improves", "contact", "good_contact", "good_outcome")){ + cat("Processing:", outcome) + groups = list() + for(group in c("unlikely", "possible", "golden")){ + cat(".") + group_codings = messaged_codings[ + messaged_codings$group == group, + ] + model = glm( + group_codings[[outcome]] ~ + anon + + ntalk_edits_before_msg + + talk_edits_before_msg + + teaching * personal, + data = group_codings + ) + + ncoefs = length(model$coefficients) + features = list() + i = 0 + for(feature in names(model$coefficients)){ + i = i+1 + features[[feature]] = list( + coef=coefficients(summary(model))[i], + error=coefficients(summary(model))[i+ncoefs], + pval=coefficients(summary(model))[i+ncoefs*3] + ) + } + groups[[group]] = features + } + cat("\n") + outcomes[[outcome]] = groups + } + outcomes +} +#outcomes = regressions(messaged_codings) + + +# # Try removing teaching*personal. # @@ -113,27 +156,27 @@ cat("============================================================\n") print(summary(glm( - good_outcome ~ anon + ntalk_edits_before_msg + talk_edits_before_msg + teaching * personal * image, + good_outcome ~ anon + ntalk_edits_before_msg + talk_edits_before_msg + teaching * personal, data = group_codings ))) print(summary(glm( - improves ~ anon + ntalk_edits_before_msg + talk_edits_before_msg + teaching * personal * image, + improves ~ anon + ntalk_edits_before_msg + talk_edits_before_msg + teaching * personal, data = group_codings ))) print(summary(glm( - contact ~ anon + ntalk_edits_before_msg + talk_edits_before_msg + teaching * personal * image, + contact ~ anon + ntalk_edits_before_msg + talk_edits_before_msg + teaching * personal, data = group_codings ))) print(summary(glm( - good_contact ~ anon + ntalk_edits_before_msg + talk_edits_before_msg + teaching * personal * image, + good_contact ~ anon + ntalk_edits_before_msg + talk_edits_before_msg + teaching * personal, data = group_codings ))) print(summary(glm( - stay ~ anon + ntalk_edits_before_msg + talk_edits_before_msg + teaching * personal * image, + stay ~ anon + ntalk_edits_before_msg + talk_edits_before_msg + teaching * personal, data = group_codings ))) Modified: trunk/tools/wsor/newbie_warnings/R/outcomes_mk2.R =================================================================== --- trunk/tools/wsor/newbie_warnings/R/outcomes_mk2.R 2012-04-07 01:08:35 UTC (rev 114793) +++ trunk/tools/wsor/newbie_warnings/R/outcomes_mk2.R 2012-04-07 18:08:59 UTC (rev 114794) @@ -1,19 +1,7 @@ source("loader/load_huggling_codings_mk2.R") library(doBy) -hugglings = load_huggling_codings_mk2() +hugglings = load_huggling_codings_mk2(reload=T) -#hugglingCounts = summaryBy( -# recipient ~ recipient, -# data = hugglings, -# FUN=length -#) -#hugglingCounts$count = hugglingCounts$recipient.length -#hugglingCounts$recipient.length = NULL -# -#hugglings = merge(hugglings, hugglingCounts, by=c("recipient")) - -#huggling_codings = load_huggling_codings(reload=T) -#messaged_codings = huggling_codings[!is.na(huggling_codings$before_rating),] ifNA = function(val, naThen){ if(is.na(val)){ naThen @@ -33,13 +21,47 @@ F ) ) -hugglings$good_contact = hugglings$contact & !hugglings$retaliates -hugglings$stay = !is.na(hugglings$after_rating) -hugglings$improves = hugglings$after_rating > hugglings$before_rating +hugglings$good_contact = mapply( + function(contact, retaliates){ + if(contact){ + !retaliates + }else{ + NA + } + }, + hugglings$contact, + hugglings$retaliates +) +hugglings$stay = !is.na(hugglings$after_rating) +hugglings$active = hugglings$edits_after_msg_3days > 0 +hugglings$warned_again = hugglings$warnings_after_72hrs > 0 +hugglings$blocked = !is.na(hugglings$blocked_after_msg_seconds) & hugglings$blocked_after_msg_seconds < 604800 +hugglings$improves = hugglings$after_rating > hugglings$before_rating +hugglings$other_user_talk_edits = hugglings$user_talk_edits_after_msg - hugglings$edits_own_talkpage hugglings$talk_edits_before_msg = with( hugglings, - user_talk_edits_after_msg + article_talk_edits_before_msg + user_talk_edits_before_msg + article_talk_edits_before_msg ) +hugglings$talk_edits_after_msg = with( + hugglings, + user_talk_edits_after_msg + article_talk_edits_after_msg +) +hugglings$article_talk_before = hugglings$article_talk_edits_before_msg +hugglings$user_talk_before = hugglings$user_talk_edits_before_msg + + +hugglings$db_good_communication = mapply( + function(other_user_talk_edits, warned_again){ + if(other_user_talk_edits){ + !warned_again + }else{ + NA + } + }, + hugglings$other_user_talk_edits, + hugglings$warned_again +) + # Can't do it #messaged_codings$ntalk_edits_before_msg = with( # messaged_codings, @@ -109,7 +131,179 @@ } )) +pval = function(val){ + if(is.na(val)){ + "<span style=\"color: #ccc;\">---</span>" + } + else if(val < 0.05){ + if(val < 0.001){ + val = "< .001" + }else{ + val = round(val, 3) + } + paste("'''", val, "'''", sep="") + }else if(val < 0.10){ + paste("<span style=\"text-decoration: underline;\">", round(val, 3), "</span>", sep="") + }else{ + paste("<span style=\"color: #ccc;\">", round(val, 3), "</span>", sep="") + } +} +model_outcomes = function(model){ + ncoefs = length(model$coefficients) + coefs = list() + i = 0 + for(feature in names(model$coefficients)){ + i = i+1 + coefs[[feature]] = list( + coef=coefficients(summary(model))[i], + error=coefficients(summary(model))[i+ncoefs], + pval=coefficients(summary(model))[i+ncoefs*3] + ) + } + outcomes = list(coefs=coefs) + smry = summary(model) + if(!is.null(smry$r.squared)){ + outcomes$fitness = paste("R-squared:", round(smry$r.squared,2)) + }else{ + outcomes$fitness = paste("AIC:", round(smry$aic, 2)) + } + outcomes +} + +linear_outcomes = function(f){ + outcomes = list() + outcome_names = c( + "ns0_edits_after_msg", + "talk_edits_after_msg", + "article_talk_edits_after_msg", + "user_talk_edits_after_msg", + "edits_hugglers_talkpage", + "other_user_talk_edits" + ) + for(outcome in outcome_names){ + model = lm( + f[[outcome]] ~ + (personal + nodirectives) * + ( + warning_first_msg + + is_anon + + is_shared_ip + + ns0_edits_before_msg + + talk_edits_before_msg + ), + data=f + ) + outcomes[[outcome]] = model_outcomes(model) + } + outcomes +} +logistic_outcomes = function(f){ + outcomes = list() + outcome_names = c( + "stay", + "active", + "improves", + "contact", + "good_contact", + "warned_again", + "blocked", + "db_good_communication" + ) + for(outcome in outcome_names){ + model = glm( + f[[outcome]] ~ + (personal + nodirectives) * + ( + warning_first_msg + + is_anon + + is_shared_ip + + ns0_edits_before_msg + + talk_edits_before_msg + ), + data=f, + family=binomial("logit") + ) + outcomes[[outcome]] = model_outcomes(model) + } + outcomes +} +outcome_coefs = list( + overall = c( + linear_outcomes(hugglings), + logistic_outcomes(hugglings) + ), + vandal = c( + linear_outcomes(hugglings[hugglings$group == "vandal",]), + logistic_outcomes(hugglings[hugglings$group == "vandal",]) + ), + "bad faith" = c( + linear_outcomes(hugglings[hugglings$group == "bad faith",]), + logistic_outcomes(hugglings[hugglings$group == "bad faith",]) + ), + test = c( + linear_outcomes(hugglings[hugglings$group == "test",]), + logistic_outcomes(hugglings[hugglings$group == "test",]) + ), + "good faith" = c( + linear_outcomes(hugglings[hugglings$group == "good faith",]), + logistic_outcomes(hugglings[hugglings$group == "good faith",]) + ) +) +for(outcome in names(outcome_coefs$overall)){ + cat("==Outcome:", outcome, "==\n") + cat('{| class="wikitable"\n') + cat('| align="center" style="background:#f0f0f0;font-weight:bold;"|Coefficients:\n') + cat( + '| colspan = "3" align="center" style="background:#f0f0f0;font-weight:bold;"|overall', + paste("(", outcome_coefs[["overall"]][[outcome]]$fitness, ")", sep=""), + '||\n' + ) + cat( + '| colspan = "3" align="center" style="background:#f0f0f0;font-weight:bold;"|vandal', + paste("(", outcome_coefs[["vandal"]][[outcome]]$fitness, ")", sep=""), + '||\n' + ) + cat( + '| colspan = "3" align="center" style="background:#f0f0f0;font-weight:bold;"|bad faith', + paste("(", outcome_coefs[["bad faith"]][[outcome]]$fitness, ")", sep=""), + '||\n' + ) + cat( + '| colspan = "3" align="center" style="background:#f0f0f0;font-weight:bold;"|test', + paste("(", outcome_coefs[["test"]][[outcome]]$fitness, ")", sep=""), + '||\n' + ) + cat( + '| colspan = "3" align="center" style="background:#f0f0f0;font-weight:bold;"|good faith', + paste("(", outcome_coefs[["good faith"]][[outcome]]$fitness, ")", sep=""), + '||\n' + ) + cat("|-\n! ") + cat("||coef||error||p-val||") + cat("||coef||error||p-val||") + cat("||coef||error||p-val||") + cat("||coef||error||p-val||") + cat("||coef||error||p-val||\n") + for(feature in names(outcome_coefs$overall$stay$coefs)){ + cat('|-\n| ', feature) + for(group in names(outcome_coefs)){ + coefs = outcome_coefs[[group]][[outcome]]$coefs[[feature]] + cat( + "", + round(coefs$coef, 3), + round(coefs$error, 3), + pval(coefs$pval), + "", + sep="||" + ) + } + cat("\n") + } + cat("|}\n\n") +} + + for(group in c("vandal", "bad faith", "test", "good faith")){ group_codings = hugglings[hugglings$group == group,] @@ -118,33 +312,130 @@ cat("============================================================\n") print(summary(glm( - good_outcome ~ is_anon + personal + nodirectives, - data = group_codings + good_outcome ~ + (personal + nodirectives) * + (warning_first_msg + + is_shared_ip + + is_anon + + ns0_edits_before_msg + + user_talk_edits_before_msg), + data = group_codings, + family=binomial(link="logit") ))) print(summary(glm( - improves ~ is_anon + personal + nodirectives, - data = group_codings[group_codings$before_rating <= 4,] + improves ~ + (personal + nodirectives) * + (warning_first_msg + + is_shared_ip + + is_anon + + ns0_edits_before_msg + + user_talk_edits_before_msg), + data = group_codings[group_codings$before_rating <= 4,], + family=binomial(link="logit") ))) print(summary(glm( - contact ~ is_anon + personal + nodirectives, - data = group_codings + contact ~ + (personal + nodirectives) * + (warning_first_msg + + is_shared_ip + + is_anon + + ns0_edits_before_msg + + user_talk_edits_before_msg), + data = group_codings, + family=binomial(link="logit") ))) print(summary(glm( - good_contact ~ is_anon + personal + nodirectives, - data = group_codings[group_codings$contact,] + good_contact ~ + (personal + nodirectives) * + (warning_first_msg + + is_shared_ip + + is_anon + + ns0_edits_before_msg + + user_talk_edits_before_msg), + data = group_codings[group_codings$contact,], + family=binomial(link="logit") ))) + cat("Stay = after_rating != NA") print(summary(glm( - stay ~ is_anon + personal + nodirectives, + stay ~ + (personal + nodirectives) * + (warning_first_msg + + is_shared_ip + + is_anon + + ns0_edits_before_msg + + user_talk_edits_before_msg), + data = group_codings, + family=binomial(link="logit") + ))) + + cat("Active = edits_after_msg_3days > 0") + print(summary(glm( + active ~ + (personal + nodirectives) * + (warning_first_msg + + is_shared_ip + + is_anon + + ns0_edits_before_msg + + user_talk_edits_before_msg), + data = group_codings, + family=binomial(link="logit") + ))) + + cat("Warned_again = warnings_after_24hrs > 0") + print(summary(glm( + warned_again ~ + (personal + nodirectives) * + (warning_first_msg + + is_shared_ip + + is_anon + + ns0_edits_before_msg + + user_talk_edits_before_msg), + data = group_codings, + family=binomial(link="logit") + ))) + + cat("Blocked = blocked_after_msg_seconds != NA & blocked_after_msg_seconds < 604800") + print(summary(glm( + blocked ~ + (personal + nodirectives) * + (warning_first_msg + + is_shared_ip + + is_anon + + ns0_edits_before_msg + + user_talk_edits_before_msg), + data = group_codings, + family=binomial(link="logit") + ))) + + print(summary(glm( + ns0_edits_after_msg ~ + (personal + nodirectives) * + (warning_first_msg + + is_shared_ip + + is_anon + + ns0_edits_before_msg + + user_talk_edits_before_msg), data = group_codings ))) + print(summary(glm( + user_talk_edits_after_msg ~ + (personal + nodirectives) * + (warning_first_msg + + is_shared_ip + + is_anon + + ns0_edits_before_msg + + user_talk_edits_before_msg), + data = group_codings + ))) cat("\n\n\n") } + meanNoNA = function(x){ mean(x, na.rm=T) } _______________________________________________ MediaWiki-CVS mailing list MediaWiki-CVS [at] lists https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs
|