MediaWiki:Common.js/Wikinews:Article principal/leadGenerator
mw.loader.load('//en.wikinews.org/w/index.php?title=user:Bawolff/mwapilib2.js&action=raw&ctype=text/javascript'); if (!window.Bawolff) {
window.Bawolff = {};
} // //en.wikinews.org/w/api.php?action=query&prop=revisions&titles=template:Lead%20article%201|template:Lead%20article%202|template:Lead%20article%203|template:Lead%20article%204|template:Lead%20article%205&rvprop=timestamp|content //Call as Bawolff.leadGen(pageName, 1, alert) //replacing 1 with which lead (1-5), alert with your callback //optionally takes summary method (as numeric) argument. choose 0-4 Bawolff.leadGen = function (title, leadNumb, callback, summaryMethod) {
api(title).getPage().lift(Bawolff.leadGen.extract, title, summaryMethod).lift(Bawolff.leadGen.create, leadNumb).lift(callback, title).exec();
}
/********
This is a list that maps categories/infboxes to generic images
Note: the category map only works with categories explicity included. it does not count categories included by templates Note: this does not consider templates with parameters
- /
Bawolff.leadGen.imgMap = { "Brésil": "Flag of Brazil.svg",
"Canada": "Flag of Canada.svg", "États-Unis d'Amérique": "Flag of the United States.svg", "France": "Flag of France.svg", "Informatique": "Computer-aj aj ashton 01.svg", "Mexique": "Flag of Mexico.svg", "Nécrologie": "Wikinews tag obituary.png", "Ontario": "Flag of Ontario.svg", "Québec": "Flag of Quebec.svg", "Royaume-Uni": "Flag of the United Kingdom.svg", "Football": "Wikinews-football.svg", "Science et technologie": "Science-symbol-2.svg" }
//takes the source of a wikipage, extracts the first image name without the leading namespace Bawolff.leadGen.extractImg = function (page) {
var imgRegex = /\[\[(?:[iI][mM][aA][gG][eE]\:|[fF][Ii][lL][eE]\:|[Ff][Ii][cC][hH][iI][eE][rR]\:)((?:[^\|\]])*?\.[pPsSjJgG][nNvVpPiI][gGeEfF][gG]?)[\|\]]?/;
var img = imgRegex.exec(page);
if (img && img.length >= 2) {
return img[1];
}
else {
var infoboxRegex = /\{\{([^|}]*)\}\}/g;
var categoryRegex = /\[\[[cC]at[eé]gor[yi]e?:([^|\]]*)(?:\|[^\]]*)?\]\]/g;
//js seems to reuse these objects from prev calls without reseting lastIndex. reset lastIndex.
infoboxRegex.lastIndex = categoryRegex.lastIndex = 0;
var item; //note doesn't match infoboxes w/params
while (item = infoboxRegex.exec(page)) {
item = item[1];
item = item.charAt(0).toUpperCase() + item.substring(1, item.length);
if (Bawolff.leadGen.imgMap[item]) return Bawolff.leadGen.imgMap[item];
}
while (item = categoryRegex.exec(page)) {
item = item[1];
item = item.charAt(0).toUpperCase() + item.substring(1, item.length);
if (Bawolff.leadGen.imgMap[item]) return Bawolff.leadGen.imgMap[item];
}
return "Wikinews-logo.png"; //default // return ""; }
}
Bawolff.leadGen.extractAudio = function (page) { //Version audio|Ares I-X lancement réussi.ogg
var res = page.match(/\{\{[vV]ersion audio\|([^|}]*)\|?[^}]*\}\}/);
if (res) return res[1];
return "";
} Bawolff.leadGen.extractDate = function (page) {
var ress = page.match(/\{\{[Dd]ate\|([^|}]*)\|?[^}]*\}\}/);
if (ress) return ress[1];
return "";
} Bawolff.leadGen.extractType = function (page) {
//valid types are: breaking, special, original, exclusive, urgent or none.
//this currently does not detect special or urget.
if (page.match(/\{\{[eE]n(?: cours)?\}\}/)) {
return "breaking";
}
else if (page.match(/\{\{[iI]nterview(?:\|[^}]*)?\}\}/i)) {
return "exclusive";
}
else if (page.match(/\{\{[rR]eportage(?: original)?(?:\|[^}]*)?\}\}/i)) {
return "original";
}
else {
//default
return "none";
}
}
Bawolff.leadGen.takeIntro = function (pageText, method) {
//first test for redirects.
var isRedirect = pageText.match(/^#redirect\s?\[\[([^\]]*)\]\]/i)
if (isRedirect) {
alert("Il semble que vous essayez d\'utiliser la création d\'articles principaux sur un page de redirection. Utiliser s\'il vous plaît le nom réel de la page au lieu de (" + isRedirect[1] + ").");
throw new Error("La page est une redirection. Merci de résoudre manuellement à : " + isRedirect[1] );
}
//doesn't handle links that make [1].
//intentionally doesn't strip ' chars (bold or italic)
//as this often marks thigs with periods (E. coli)
//method is a number (must be a number. no type conversion preformed).
// 0: 1st sentence
// 1: 1st two senetences
// 2: 1st paragraph
// 3: 1st 250 characters (+ a couple so we don't end in middle of word), or paragraph
// 4: 1st 500 characters-ish, or paragraph
var fixWLink = /\{\{[wW]\|([^\}]+)\}\}/g
var fixformatnum = /\{\{formatnum\:([^\}]+)\}\}/g
var stripTemplates = /\{\{[^\}\{]*(?:\{\{[^\}\{]*(?:\{\{[^\}\{]*(?:\{\{[^\}\{]*\}\})?\}\})?\}\})?\}\}/g;
var stripCitations = /\{\{(?:"\s?"|Guil|Citation1?)\|([^}]*)\}\}/g; //note: this misinterperts template:""
var stripRefs = /\<ref[^>]*\>[\s\S]*?\<\/ref>/g;
//the img regex, looks for a start of the image, than checks for nested internal links, external links, and for ending ]] in caption
//templates shold already be stripped at this point. (fr image = Fichier)
var img = /\[\[[IifF][mMIi][aALlcC][GgEehH][eEiI]?[eE]?[rR]?\:(?:\[\[(?:[^\]]*)\]\]|[^\]]|\](?!\]))*]]/g
//note, these are run multiple times to deal with nesting. pageText = pageText.replace(fixWLink, '$1'); pageText = pageText.replace(fixformatnum, '$1'); pageText = pageText.replace(stripCitations, '« $1 »'); pageText = pageText.replace(stripCitations, '« $1 »'); pageText = pageText.replace(stripTemplates, ); pageText = pageText.replace(stripTemplates, ); pageText = pageText.replace(stripTemplates, ); pageText = pageText.replace(stripTemplates, ); pageText = pageText.replace(stripRefs, ); pageText = pageText.replace(img, ); var pipedLink = /\[\[[^\]\|]*\|([^\]\|]*)\]\]/g; pageText = pageText.replace(pipedLink, '$1'); var normLink = /\[\[([^\]\|]*)\]\]/g; pageText = pageText.replace(normLink, '$1'); var extLink = /\[(?:http|ftp|gopher|irc|https)\:[^\]\s]*\s?([^\]]*)]/g pageText = pageText.replace(extLink, '$1'); var firstPar; switch (method) { case 1: //1st 2 sentence or 1st paragraph firstPar = /[^\n]+?\s[^\s\.]*\.(?=\s)(?:[^\n]+?\s[^\s\.]*\.(?=\s)|(?=\s))/; pageText = pageText.match(firstPar)[0]; break; case 2: //1st paragraph firstPar = /[^\n]+?(?=\n)/; pageText = pageText.match(firstPar)[0]; break; case 3: // 1st 250 characters-ish firstPar = /[^\n]{2,250}.*?\b/; pageText = pageText.match(firstPar)[0] + "..."; break; case 4: //1st 500 characters-ish firstPar = /[^\n]{2,500}.*?\b/; pageText = pageText.match(firstPar)[0] + "..."; break; default: //aka case 0. 1st sentence firstPar = /[^\n]+?\s[^\s\.]*\.(?=\s)/; pageText = pageText.match(firstPar)[0]; break; } return pageText;
}
Bawolff.leadGen.extract = function(pageText, pageName, summaryMethod) { //editlink is overriden later. return { width:'200',
image: Bawolff.leadGen.extractImg(pageText),
title: pageName,
synopsis: Bawolff.leadGen.takeIntro(pageText, summaryMethod),
edit_this: 'Wikinews:Bac_à_sable',
audio: Bawolff.leadGen.extractAudio(pageText),
date: Bawolff.leadGen.extractDate(pageText)
};
}
Bawolff.leadGen.create = function (leadObj, leadNumb) {
var res = '{' + '{Article principal';
res += "\n |modif=Modèle:Une " + leadNumb;
res += "\n |Image=" + leadObj.image;
res += "\n |Image width=" + leadObj.width;
res += "\n |Image text="; //default to blank for now.
res += "\n |bordure=1";
res += "\n |thème="; //default to blank for now.
res += "\n |thème nom="; //default to blank for now.
res += "\n |date=" + leadObj.date;
res += "\n |Titre=" + leadObj.title;
res += "\n |audio=" + leadObj.audio;
res += "\n |Synopsis=" + leadObj.synopsis;
res += "\n}}\n{{"
res += "Modèle:Article principal/Documentation}}";
return res;
} //calls its argument giving it an object with meta info about the current leads. //probably want to use Bawolff.leadGen.makeLeadTable instead.
Bawolff.leadGen.makeLeadMetaObject = function (callback) {
var leadObj = function (doc) {
var exTitle = function (text) {
text = text.replace(/[\s\S]*?\|Titre=([^\|]*)[\s\S]*/, '$1');
text = text.replace(/\s*$/, );
return text;
}
var pages = doc.getElementsByTagName('page');
var obj = {}, time, time2 = Infinity;
for (var i = 0; i < pages.length; i++) {
pages[i].getElementsByTagName('rev')[0].normalize();
time = Bawolff.mwapi.parseAPIDate(pages[i].getElementsByTagName('rev')[0].getAttribute('timestamp'));
//This is really ugly...
//put the oldest lead in a global variable.
if (time < time2) { //this compares miliseconds after epoch
Bawolff.leadGen.oldestLead = pages[i].getAttribute('title');
time2 = time;
}
obj[pages[i].getAttribute('title')] = {timestamp: time,
title: exTitle(pages[i].getElementsByTagName('rev')[0].firstChild.data)};
}
return obj;
}
api().makeRequest({action: 'query', prop: 'revisions', titles: 'Modèle:Une 1|Modèle:Une 2|Modèle:Une 3', prop: 'revisions', rvprop: 'timestamp|content', redirects: true}, leadObj).lift(callback).exec();
}
Bawolff.leadGen.oldestLead = null;
Bawolff.leadGen.makeLeadTable = function (callback) {
var wrapper = function(leadObj) {
var html = '
'; html += '<thead></thead><tbody>'; for (var i in leadObj) {
if (leadObj.hasOwnProperty(i)) {
var numb = i.charAt(i.length - 1);
html += '";
}
}
html += '</tbody>| # | Position | Article | Âge |
|---|---|---|---|
';
html += numb; //last characterhtml += ' | ';
html += Bawolff.leadGen.leadToPosition(numb);html += ' | ';
html += leadObj[i].title;html += ' | ';
var time = leadObj[i].timestamp.getTime(); //convert to miliseconds
var d = (new Date).getTime();
var delta = Math.round((d - time) / (1000*60*60));
var timeStr;
if (delta === 1) {
timeStr = delta + ' heure';
}
else {
timeStr = delta + ' heures';
}
html += timeStr;
html += " |
';
callback(html); } Bawolff.leadGen.makeLeadMetaObject(wrapper);
}
Bawolff.leadGen.leadToPosition = function (numb) { //double equal sign intentional to convert from string.
if (numb == 1 || numb === 'e') {
return "En haut";
}
if (numb == 2) {
return "Milieu";
}
if (numb == 3) {
return "Milieu 2";
}
if (numb == 4) {
return "En bas";
}
else {
throw new Error("Chiffre de l\'article principal invalide (" + numb +") trasmis à Bawolff.leadGen.leadToPosition");
}
}