MediaWiki:ScmcScanner.js: различия между версиями
Страница интерфейса MediaWiki
Дополнительные действия
Defer (обсуждение | вклад) Нет описания правки |
Defer (обсуждение | вклад) Нет описания правки |
||
| Строка 62: | Строка 62: | ||
function uniqueSorted(list) { | function uniqueSorted(list) { | ||
var seen = {}; | var seen = {}; | ||
list.forEach(function (item) { | list.forEach(function (item) { | ||
var title = normalizeTitle(item); | var title = normalizeTitle(item); | ||
| Строка 74: | Строка 75: | ||
return a.localeCompare(b, 'ru'); | return a.localeCompare(b, 'ru'); | ||
}); | }); | ||
} | |||
function chunkArray(items, size) { | |||
var chunks = []; | |||
for (var i = 0; i < items.length; i += size) { | |||
chunks.push(items.slice(i, i + size)); | |||
} | |||
return chunks; | |||
} | |||
function extractAttr(tag, names) { | |||
for (var i = 0; i < names.length; i++) { | |||
var name = names[i]; | |||
var re = new RegExp(name + '\\s*=\\s*(["\\\'])(.*?)\\1', 'i'); | |||
var match = tag.match(re); | |||
if (match) { | |||
return match[2]; | |||
} | |||
} | |||
return ''; | |||
} | |||
function isStopCatalogRow(tag) { | |||
var scanValue = normalizeTitle(extractAttr(tag, ['data-scan', 'data_scan'])).toLowerCase(); | |||
var classValue = ' ' + normalizeTitle(extractAttr(tag, ['class'])).toLowerCase() + ' '; | |||
return scanValue === 'stop' || | |||
classValue.indexOf(' scmc-scan-stop ') !== -1 || | |||
classValue.indexOf(' scan-stop ') !== -1 || | |||
classValue.indexOf(' stop ') !== -1; | |||
} | |||
function getApi() { | |||
return new mw.Api(); | |||
} | |||
function resolveTitles(api, titles) { | |||
var unique = uniqueSorted(titles); | |||
var chunks = chunkArray(unique, 45); | |||
var finalResult = {}; | |||
var allRedirects = {}; | |||
function followRedirect(title, redirectMap) { | |||
var current = normalizeTitle(title); | |||
var guard = 0; | |||
while (redirectMap[titleKey(current)] && guard < 10) { | |||
current = redirectMap[titleKey(current)]; | |||
guard++; | |||
} | |||
return current; | |||
} | |||
function processChunk(chunk) { | |||
if (!chunk.length) return $.Deferred().resolve().promise(); | |||
return api.get({ | |||
action: 'query', | |||
titles: chunk.join('|'), | |||
redirects: 1, | |||
formatversion: 2 | |||
}).then(function (data) { | |||
var redirectMap = {}; | |||
var pagesByKey = {}; | |||
if (data.query && data.query.redirects) { | |||
data.query.redirects.forEach(function (redirect) { | |||
var from = normalizeTitle(redirect.from); | |||
var to = normalizeTitle(redirect.to); | |||
redirectMap[titleKey(from)] = to; | |||
allRedirects[from] = to; | |||
}); | |||
} | |||
if (data.query && data.query.pages) { | |||
data.query.pages.forEach(function (page) { | |||
var pageTitle = normalizeTitle(page.title); | |||
pagesByKey[titleKey(pageTitle)] = { | |||
title: pageTitle, | |||
exists: page.missing === undefined | |||
}; | |||
}); | |||
} | |||
chunk.forEach(function (requested) { | |||
var requestedTitle = normalizeTitle(requested); | |||
var finalTitle = followRedirect(requestedTitle, redirectMap); | |||
var page = pagesByKey[titleKey(finalTitle)]; | |||
finalResult[titleKey(requestedTitle)] = { | |||
requestedTitle: requestedTitle, | |||
finalTitle: page ? page.title : finalTitle, | |||
exists: page ? page.exists : null | |||
}; | |||
}); | |||
}); | |||
} | |||
var chain = $.Deferred().resolve().promise(); | |||
chunks.forEach(function (chunk) { | |||
chain = chain.then(function () { | |||
return processChunk(chunk); | |||
}); | |||
}); | |||
return chain.then(function () { | |||
return { | |||
items: finalResult, | |||
redirects: allRedirects | |||
}; | |||
}); | |||
} | |||
function loadCatalog(api, catalogTitle) { | |||
return api.get({ | |||
action: 'parse', | |||
page: catalogTitle, | |||
prop: 'wikitext', | |||
formatversion: 2 | |||
}).then(function (data) { | |||
var wikitext = ''; | |||
if (data.parse && typeof data.parse.wikitext === 'string') { | |||
wikitext = data.parse.wikitext; | |||
} else if (data.parse && data.parse.wikitext && data.parse.wikitext['*']) { | |||
wikitext = data.parse.wikitext['*']; | |||
} | |||
var rawPages = []; | |||
var rawStopPages = {}; | |||
var rowRe = /<div\b[^>]*scmc-catalog-row[^>]*>/gi; | |||
var match; | |||
while ((match = rowRe.exec(wikitext)) !== null) { | |||
var tag = match[0]; | |||
var page = normalizeTitle(extractAttr(tag, ['data-page', 'data_page'])); | |||
if (!page) continue; | |||
rawPages.push(page); | |||
if (isStopCatalogRow(tag)) { | |||
rawStopPages[titleKey(page)] = true; | |||
} | |||
} | |||
return resolveTitles(api, rawPages).then(function (resolved) { | |||
var knownPages = {}; | |||
var stopPages = {}; | |||
var redirects = resolved.redirects; | |||
rawPages.forEach(function (rawPage) { | |||
var rawKey = titleKey(rawPage); | |||
var resolvedItem = resolved.items[rawKey]; | |||
var finalTitle = resolvedItem ? resolvedItem.finalTitle : rawPage; | |||
var finalKey = titleKey(finalTitle); | |||
knownPages[rawKey] = rawPage; | |||
knownPages[finalKey] = finalTitle; | |||
if (rawStopPages[rawKey]) { | |||
stopPages[rawKey] = true; | |||
stopPages[finalKey] = true; | |||
} | |||
}); | |||
return { | |||
knownPages: knownPages, | |||
stopPages: stopPages, | |||
redirects: redirects, | |||
count: Object.keys(knownPages).length | |||
}; | |||
}); | |||
}); | |||
} | } | ||
function buildScanner(container) { | function buildScanner(container) { | ||
var rootTitle = normalizeTitle(container.getAttribute('data-root') || 'Marine_Corps'); | var rootTitle = normalizeTitle(container.getAttribute('data-root') || 'Marine_Corps'); | ||
var catalogTitle = normalizeTitle(container.getAttribute('data-catalog') || 'MC:Страницы'); | |||
var maxDepth = parseInt(container.getAttribute('data-depth') || '5', 10); | var maxDepth = parseInt(container.getAttribute('data-depth') || '5', 10); | ||
var maxPages = parseInt(container.getAttribute('data-max-pages') || ' | var maxPages = parseInt(container.getAttribute('data-max-pages') || '300', 10); | ||
var extraExcludedTitles = splitDataList(container.getAttribute('data-exclude')); | var extraExcludedTitles = splitDataList(container.getAttribute('data-exclude')); | ||
| Строка 107: | Строка 291: | ||
if (isExcludedTitle(link.title)) return false; | if (isExcludedTitle(link.title)) return false; | ||
return true; | return true; | ||
} | } | ||
| Строка 207: | Строка 387: | ||
'div', | 'div', | ||
'scmc-scan-subtitle', | 'scmc-scan-subtitle', | ||
'Старт: ' + rootTitle + ' · глубина: ' + maxDepth + ' · лимит страниц: ' + maxPages | 'Старт: ' + rootTitle + ' · каталог: ' + catalogTitle + ' · глубина: ' + maxDepth + ' · лимит страниц: ' + maxPages | ||
); | ); | ||
| Строка 226: | Строка 406: | ||
var treeBox = makeEl('div', 'scmc-scan-box'); | var treeBox = makeEl('div', 'scmc-scan-box'); | ||
var listBox = makeEl('div', 'scmc-scan-box'); | var listBox = makeEl('div', 'scmc-scan-box'); | ||
var candidatesBox = makeEl('div', 'scmc-scan-box scmc-scan-box-wide'); | |||
var redirectBox = makeEl('div', 'scmc-scan-box scmc-scan-box-wide'); | var redirectBox = makeEl('div', 'scmc-scan-box scmc-scan-box-wide'); | ||
resultWrap.appendChild(treeBox); | resultWrap.appendChild(treeBox); | ||
resultWrap.appendChild(listBox); | resultWrap.appendChild(listBox); | ||
resultWrap.appendChild(candidatesBox); | |||
resultWrap.appendChild(redirectBox); | resultWrap.appendChild(redirectBox); | ||
| Строка 247: | Строка 429: | ||
treeBox.innerHTML = ''; | treeBox.innerHTML = ''; | ||
listBox.innerHTML = ''; | listBox.innerHTML = ''; | ||
candidatesBox.innerHTML = ''; | |||
redirectBox.innerHTML = ''; | redirectBox.innerHTML = ''; | ||
statsBox.innerHTML = ''; | statsBox.innerHTML = ''; | ||
| Строка 254: | Строка 437: | ||
function renderTree(root, childrenMap, pageInfo, firstParent, alsoLinkedFrom) { | function renderTree(root, childrenMap, pageInfo, firstParent, alsoLinkedFrom) { | ||
treeBox.innerHTML = ''; | treeBox.innerHTML = ''; | ||
treeBox.appendChild(makeEl('div', 'scmc-scan-box-title', 'Дерево известных страниц')); | |||
treeBox.appendChild(makeEl('div', 'scmc-scan-box-title', 'Дерево | |||
function makeNode(title, path) { | function makeNode(title, path) { | ||
| Строка 275: | Строка 457: | ||
if (info.requestedTitle && titleKey(info.requestedTitle) !== titleKey(info.finalTitle)) { | if (info.requestedTitle && titleKey(info.requestedTitle) !== titleKey(info.finalTitle)) { | ||
line.appendChild(makeEl('span', 'scmc-scan-redirect-mini', info.requestedTitle + ' → ' + info.finalTitle)); | line.appendChild(makeEl('span', 'scmc-scan-redirect-mini', info.requestedTitle + ' → ' + info.finalTitle)); | ||
} | |||
if (info.candidate) { | |||
line.appendChild(makeEl('span', 'scmc-scan-candidate', 'кандидат')); | |||
} | |||
if (info.catalogStop) { | |||
line.appendChild(makeEl('span', 'scmc-scan-stopped', 'остановлено каталогом')); | |||
} | |||
if (info.depthStop) { | |||
line.appendChild(makeEl('span', 'scmc-scan-stopped', 'предел глубины')); | |||
} | } | ||
| Строка 332: | Строка 526: | ||
} | } | ||
function | function renderKnownList(pageInfo) { | ||
listBox.innerHTML = ''; | listBox.innerHTML = ''; | ||
listBox.appendChild(makeEl('div', 'scmc-scan-box-title', 'Известные страницы из каталога')); | |||
listBox.appendChild(makeEl('div', 'scmc-scan- | var pages = Object.keys(pageInfo) | ||
.map(function (key) { | |||
return pageInfo[key]; | |||
}) | |||
.filter(function (info) { | |||
return !info.candidate; | |||
}) | |||
.sort(function (a, b) { | |||
if (a.depth !== b.depth) return a.depth - b.depth; | |||
return a.finalTitle.localeCompare(b.finalTitle, 'ru'); | |||
}); | |||
if (!pages.length) { | |||
listBox.appendChild(makeEl('div', 'scmc-scan-empty', 'Пока пусто.')); | |||
return; | |||
} | |||
var list = makeEl('ol', 'scmc-scan-page-list'); | var list = makeEl('ol', 'scmc-scan-page-list'); | ||
var pages = Object.keys( | pages.forEach(function (info) { | ||
var li = document.createElement('li'); | |||
var link = document.createElement('a'); | |||
link.href = titleToUrl(info.finalTitle); | |||
link.textContent = info.finalTitle; | |||
li.appendChild(link); | |||
li.appendChild(makeEl('span', 'scmc-scan-depth', 'ур. ' + info.depth)); | |||
if (info.requestedTitle && titleKey(info.requestedTitle) !== titleKey(info.finalTitle)) { | |||
li.appendChild(makeEl('span', 'scmc-scan-redirect-mini', 'найдено как: ' + info.requestedTitle)); | |||
} | |||
if (info.catalogStop) { | |||
li.appendChild(makeEl('span', 'scmc-scan-stopped', 'остановлено каталогом')); | |||
} | |||
if (info.depthStop) { | |||
li.appendChild(makeEl('span', 'scmc-scan-stopped', 'предел глубины')); | |||
} | |||
list.appendChild(li); | |||
}); | |||
listBox.appendChild(list); | |||
} | |||
function renderCandidates(candidates) { | |||
candidatesBox.innerHTML = ''; | |||
candidatesBox.appendChild(makeEl('div', 'scmc-scan-box-title', 'Кандидаты: ссылки не из каталога')); | |||
var pages = Object.keys(candidates) | |||
.map(function (key) { | .map(function (key) { | ||
return | return candidates[key]; | ||
}) | }) | ||
.sort(function (a, b) { | .sort(function (a, b) { | ||
| Строка 347: | Строка 589: | ||
return a.finalTitle.localeCompare(b.finalTitle, 'ru'); | return a.finalTitle.localeCompare(b.finalTitle, 'ru'); | ||
}); | }); | ||
if (!pages.length) { | |||
candidatesBox.appendChild(makeEl('div', 'scmc-scan-empty', 'Новых кандидатов не найдено.')); | |||
return; | |||
} | |||
var list = makeEl('ol', 'scmc-scan-page-list'); | |||
pages.forEach(function (info) { | pages.forEach(function (info) { | ||
| Строка 357: | Строка 606: | ||
li.appendChild(link); | li.appendChild(link); | ||
li.appendChild(makeEl('span', 'scmc-scan-depth', 'ур. ' + info.depth)); | li.appendChild(makeEl('span', 'scmc-scan-depth', 'ур. ' + info.depth)); | ||
li.appendChild(makeEl('span', 'scmc-scan-candidate', 'не сканировалась')); | |||
if (info.from) { | |||
li.appendChild(makeEl('span', 'scmc-scan-ref', 'найдена из: ' + info.from)); | |||
} | |||
if (info.requestedTitle && titleKey(info.requestedTitle) !== titleKey(info.finalTitle)) { | if (info.requestedTitle && titleKey(info.requestedTitle) !== titleKey(info.finalTitle)) { | ||
li.appendChild(makeEl('span', 'scmc-scan-redirect-mini', ' | li.appendChild(makeEl('span', 'scmc-scan-redirect-mini', info.requestedTitle + ' → ' + info.finalTitle)); | ||
} | } | ||
| Строка 369: | Строка 623: | ||
}); | }); | ||
candidatesBox.appendChild(list); | |||
} | } | ||
function renderRedirects(redirectsFound) { | function renderRedirects(redirectsFound) { | ||
redirectBox.innerHTML = ''; | redirectBox.innerHTML = ''; | ||
redirectBox.appendChild(makeEl('div', 'scmc-scan-box-title', 'Редиректы')); | redirectBox.appendChild(makeEl('div', 'scmc-scan-box-title', 'Редиректы')); | ||
| Строка 390: | Строка 643: | ||
keys.forEach(function (from) { | keys.forEach(function (from) { | ||
var to = redirectsFound[from]; | var to = redirectsFound[from]; | ||
var li = document.createElement('li'); | var li = document.createElement('li'); | ||
| Строка 416: | Строка 668: | ||
var api = getApi(); | var api = getApi(); | ||
scanBtn.disabled = true; | |||
clearBtn.disabled = true; | |||
setStatus('Читаю каталог: ' + catalogTitle, 'scanning'); | |||
loadCatalog(api, catalogTitle).then(function (catalog) { | |||
var knownPages = catalog.knownPages; | |||
var stopPages = catalog.stopPages; | |||
var redirectsFound = Object.assign({}, catalog.redirects); | |||
var queue = [{ | |||
requestedTitle: rootTitle, | |||
depth: 0, | |||
parent: null | |||
}]; | |||
var scanned = {}; | |||
var queued = {}; | |||
var pageInfo = {}; | |||
var candidates = {}; | |||
var childrenMap = {}; | |||
var firstParent = {}; | |||
var alsoLinkedFrom = {}; | |||
var stoppedByLimit = false; | |||
queued[titleKey(rootTitle)] = true; | |||
if ( | function addRedirects(redirects) { | ||
redirects.forEach(function (redirect) { | |||
if (redirect.from && redirect.to) { | |||
redirectsFound[redirect.from] = redirect.to; | |||
} | |||
}); | |||
} | } | ||
if (alsoLinkedFrom[childKey].indexOf(parentTitle) === -1) { | function addAlsoLinked(childTitle, parentTitle) { | ||
var childKey = titleKey(childTitle); | |||
if (!alsoLinkedFrom[childKey]) { | |||
alsoLinkedFrom[childKey] = []; | |||
} | |||
if (alsoLinkedFrom[childKey].indexOf(parentTitle) === -1) { | |||
alsoLinkedFrom[childKey].push(parentTitle); | |||
} | |||
} | } | ||
function upsertPageInfo(finalTitle, requestedTitle, depth, exists, options) { | |||
var finalKey = titleKey(finalTitle); | |||
var current = pageInfo[finalKey]; | |||
if (!current) { | |||
current = { | |||
requestedTitle: requestedTitle || finalTitle, | |||
finalTitle: finalTitle, | |||
depth: depth, | |||
exists: exists | |||
}; | |||
pageInfo[finalKey] = current; | |||
} | |||
if (depth < current.depth) { | |||
current.depth = depth; | |||
} | |||
if (requestedTitle && titleKey(requestedTitle) !== titleKey(finalTitle)) { | |||
current.requestedTitle = requestedTitle; | |||
} | |||
if (exists === false) { | |||
current.exists = false; | |||
} | |||
if (options) { | |||
Object.keys(options).forEach(function (key) { | |||
current[key] = options[key]; | |||
}); | |||
} | |||
return current; | |||
} | |||
function step() { | |||
if (!queue.length) { | |||
finish(); | |||
return; | |||
} | |||
if ( | if (Object.keys(scanned).length >= maxPages) { | ||
stoppedByLimit = true; | |||
finish(); | |||
return; | |||
} | } | ||
var item = queue.shift(); | |||
var requestedTitle = normalizeTitle(item.requestedTitle); | |||
var requestedKey = titleKey(requestedTitle); | |||
if (scanned[requestedKey]) { | |||
step(); | |||
return; | |||
} | } | ||
scanned[requestedKey] = true; | |||
setStatus('Сканирую: ' + requestedTitle + ' · уровень ' + item.depth, 'scanning'); | |||
getPageLinks(api, requestedTitle).then(function (data) { | |||
addRedirects(data.redirects); | |||
var finalTitle = normalizeTitle(data.finalTitle || requestedTitle); | |||
var finalKey = titleKey(finalTitle); | |||
var isRoot = item.depth === 0; | |||
var isKnown = !!knownPages[finalKey] || !!knownPages[requestedKey] || isRoot; | |||
var isCatalogStop = !!stopPages[finalKey] || !!stopPages[requestedKey]; | |||
upsertPageInfo(finalTitle, requestedTitle, item.depth, data.exists, { | |||
catalogStop: isCatalogStop | |||
}); | |||
if (!isKnown) { | |||
candidates[finalKey] = { | |||
requestedTitle: requestedTitle, | |||
finalTitle: finalTitle, | |||
depth: item.depth, | |||
exists: data.exists, | |||
from: item.parent || '', | |||
candidate: true | |||
}; | |||
upsertPageInfo(finalTitle, requestedTitle, item.depth, data.exists, { | |||
candidate: true | |||
}); | |||
setTimeout(step, 80); | |||
return; | |||
} | } | ||
if ( | if (isCatalogStop) { | ||
childrenMap[finalKey] = []; | |||
setTimeout(step, 80); | |||
return; | |||
} | } | ||
if (item.depth | if (item.depth >= maxDepth) { | ||
pageInfo[finalKey].depthStop = true; | |||
childrenMap[finalKey] = []; | |||
setTimeout(step, 80); | |||
return; | |||
} | |||
resolveTitles(api, data.links).then(function (resolvedLinks) { | |||
Object.keys(resolvedLinks.redirects).forEach(function (from) { | |||
redirectsFound[from] = resolvedLinks.redirects[from]; | |||
}); | }); | ||
} | |||
var children = []; | |||
data.links.forEach(function (originalLinkTitle) { | |||
var originalKey = titleKey(originalLinkTitle); | |||
var resolved = resolvedLinks.items[originalKey]; | |||
var childRequested = resolved ? resolved.requestedTitle : originalLinkTitle; | |||
var childFinal = resolved ? resolved.finalTitle : originalLinkTitle; | |||
var childExists = resolved ? resolved.exists : null; | |||
var childKey = titleKey(childFinal); | |||
if (isExcludedTitle(childRequested) || isExcludedTitle(childFinal)) { | |||
return; | |||
} | |||
var childKnown = !!knownPages[childKey] || !!knownPages[originalKey]; | |||
var childStop = !!stopPages[childKey] || !!stopPages[originalKey]; | |||
children.push(childFinal); | |||
if (!firstParent[childKey]) { | |||
firstParent[childKey] = finalTitle; | |||
} else if (titleKey(firstParent[childKey]) !== finalKey) { | |||
addAlsoLinked(childFinal, finalTitle); | |||
} | |||
if (childKnown) { | |||
upsertPageInfo(childFinal, childRequested, item.depth + 1, childExists, { | |||
catalogStop: childStop | |||
}); | |||
if (!childStop && !queued[childKey]) { | |||
queued[childKey] = true; | |||
queue.push({ | |||
requestedTitle: childFinal, | |||
depth: item.depth + 1, | |||
parent: finalTitle | |||
}); | |||
} | |||
} else { | |||
candidates[childKey] = { | |||
requestedTitle: childRequested, | |||
finalTitle: childFinal, | |||
depth: item.depth + 1, | |||
exists: childExists, | |||
from: finalTitle, | |||
candidate: true | |||
}; | |||
upsertPageInfo(childFinal, childRequested, item.depth + 1, childExists, { | |||
candidate: true | |||
}); | |||
} | |||
}); | |||
childrenMap[finalKey] = uniqueSorted(children); | |||
statsBox.textContent = | |||
'Каталог: ' + Object.keys(knownPages).length + | |||
' · Просканировано: ' + Object.keys(scanned).length + | |||
' · В очереди: ' + queue.length + | |||
' · Известных найдено: ' + Object.keys(pageInfo).length + | |||
' · Кандидатов: ' + Object.keys(candidates).length + | |||
' · Редиректов: ' + Object.keys(redirectsFound).length; | |||
setTimeout(step, 100); | |||
}); | |||
}).catch(function (error) { | |||
console.error(error); | |||
setStatus('Ошибка при сканировании: ' + requestedTitle, 'error'); | |||
scanBtn.disabled = false; | |||
clearBtn.disabled = false; | |||
}); | }); | ||
} | |||
function finish() { | |||
var rootFinal = rootTitle; | |||
Object.keys(pageInfo).some(function (key) { | |||
var info = pageInfo[key]; | |||
if (titleKey(info.requestedTitle) === titleKey(rootTitle) || titleKey(info.finalTitle) === titleKey(rootTitle)) { | |||
rootFinal = info.finalTitle; | |||
return true; | |||
} | |||
return false; | |||
}); | |||
renderTree(rootFinal, childrenMap, pageInfo, firstParent, alsoLinkedFrom); | |||
renderKnownList(pageInfo); | |||
renderCandidates(candidates); | |||
renderRedirects(redirectsFound); | |||
var finalText = | |||
'Готово. Каталог прочитан. Просканировано: ' + Object.keys(scanned).length + | |||
'. Кандидатов: ' + Object.keys(candidates).length + | |||
'. Редиректов: ' + Object.keys(redirectsFound).length + '.'; | |||
if (stoppedByLimit) { | |||
finalText += ' Остановлено по лимиту страниц.'; | |||
if ( | |||
} | } | ||
statsBox.textContent = finalText; | |||
setStatus('Готово', 'done'); | |||
scanBtn.disabled = false; | |||
clearBtn.disabled = false; | |||
} | |||
step(); | |||
}).catch(function (error) { | |||
console.error(error); | |||
setStatus('Не удалось прочитать каталог: ' + catalogTitle, 'error'); | |||
setStatus(' | |||
scanBtn.disabled = false; | scanBtn.disabled = false; | ||
clearBtn.disabled = false; | clearBtn.disabled = false; | ||
} | }); | ||
} | } | ||
Версия от 09:10, 15 июня 2026
(function () {
var scannerRoot = document.querySelector('.scmc-link-scanner');
if (!scannerRoot) return;
var EXCLUDED_PREFIXES = [
'Файл:',
'File:',
'Категория:',
'Category:',
'Шаблон:',
'Template:',
'Участник:',
'User:',
'Обсуждение:',
'Talk:',
'Служебная:',
'Special:',
'MediaWiki:',
'Модуль:',
'Module:',
'Справка:',
'Help:'
];
function normalizeTitle(title) {
return String(title || '')
.replace(/_/g, ' ')
.replace(/\s+/g, ' ')
.trim();
}
function titleKey(title) {
return normalizeTitle(title).toLowerCase();
}
function titleToUrl(title) {
return mw.util.getUrl(title);
}
function makeEl(tag, className, text) {
var el = document.createElement(tag);
if (className) el.className = className;
if (text !== undefined) el.textContent = text;
return el;
}
function makeButton(text, className) {
var btn = document.createElement('button');
btn.type = 'button';
btn.className = className || 'scmc-scan-btn';
btn.textContent = text;
return btn;
}
function splitDataList(value) {
return String(value || '')
.split('|')
.map(normalizeTitle)
.filter(Boolean);
}
function uniqueSorted(list) {
var seen = {};
list.forEach(function (item) {
var title = normalizeTitle(item);
if (title) seen[titleKey(title)] = title;
});
return Object.keys(seen)
.map(function (key) {
return seen[key];
})
.sort(function (a, b) {
return a.localeCompare(b, 'ru');
});
}
function chunkArray(items, size) {
var chunks = [];
for (var i = 0; i < items.length; i += size) {
chunks.push(items.slice(i, i + size));
}
return chunks;
}
function extractAttr(tag, names) {
for (var i = 0; i < names.length; i++) {
var name = names[i];
var re = new RegExp(name + '\\s*=\\s*(["\\\'])(.*?)\\1', 'i');
var match = tag.match(re);
if (match) {
return match[2];
}
}
return '';
}
function isStopCatalogRow(tag) {
var scanValue = normalizeTitle(extractAttr(tag, ['data-scan', 'data_scan'])).toLowerCase();
var classValue = ' ' + normalizeTitle(extractAttr(tag, ['class'])).toLowerCase() + ' ';
return scanValue === 'stop' ||
classValue.indexOf(' scmc-scan-stop ') !== -1 ||
classValue.indexOf(' scan-stop ') !== -1 ||
classValue.indexOf(' stop ') !== -1;
}
function getApi() {
return new mw.Api();
}
function resolveTitles(api, titles) {
var unique = uniqueSorted(titles);
var chunks = chunkArray(unique, 45);
var finalResult = {};
var allRedirects = {};
function followRedirect(title, redirectMap) {
var current = normalizeTitle(title);
var guard = 0;
while (redirectMap[titleKey(current)] && guard < 10) {
current = redirectMap[titleKey(current)];
guard++;
}
return current;
}
function processChunk(chunk) {
if (!chunk.length) return $.Deferred().resolve().promise();
return api.get({
action: 'query',
titles: chunk.join('|'),
redirects: 1,
formatversion: 2
}).then(function (data) {
var redirectMap = {};
var pagesByKey = {};
if (data.query && data.query.redirects) {
data.query.redirects.forEach(function (redirect) {
var from = normalizeTitle(redirect.from);
var to = normalizeTitle(redirect.to);
redirectMap[titleKey(from)] = to;
allRedirects[from] = to;
});
}
if (data.query && data.query.pages) {
data.query.pages.forEach(function (page) {
var pageTitle = normalizeTitle(page.title);
pagesByKey[titleKey(pageTitle)] = {
title: pageTitle,
exists: page.missing === undefined
};
});
}
chunk.forEach(function (requested) {
var requestedTitle = normalizeTitle(requested);
var finalTitle = followRedirect(requestedTitle, redirectMap);
var page = pagesByKey[titleKey(finalTitle)];
finalResult[titleKey(requestedTitle)] = {
requestedTitle: requestedTitle,
finalTitle: page ? page.title : finalTitle,
exists: page ? page.exists : null
};
});
});
}
var chain = $.Deferred().resolve().promise();
chunks.forEach(function (chunk) {
chain = chain.then(function () {
return processChunk(chunk);
});
});
return chain.then(function () {
return {
items: finalResult,
redirects: allRedirects
};
});
}
function loadCatalog(api, catalogTitle) {
return api.get({
action: 'parse',
page: catalogTitle,
prop: 'wikitext',
formatversion: 2
}).then(function (data) {
var wikitext = '';
if (data.parse && typeof data.parse.wikitext === 'string') {
wikitext = data.parse.wikitext;
} else if (data.parse && data.parse.wikitext && data.parse.wikitext['*']) {
wikitext = data.parse.wikitext['*'];
}
var rawPages = [];
var rawStopPages = {};
var rowRe = /<div\b[^>]*scmc-catalog-row[^>]*>/gi;
var match;
while ((match = rowRe.exec(wikitext)) !== null) {
var tag = match[0];
var page = normalizeTitle(extractAttr(tag, ['data-page', 'data_page']));
if (!page) continue;
rawPages.push(page);
if (isStopCatalogRow(tag)) {
rawStopPages[titleKey(page)] = true;
}
}
return resolveTitles(api, rawPages).then(function (resolved) {
var knownPages = {};
var stopPages = {};
var redirects = resolved.redirects;
rawPages.forEach(function (rawPage) {
var rawKey = titleKey(rawPage);
var resolvedItem = resolved.items[rawKey];
var finalTitle = resolvedItem ? resolvedItem.finalTitle : rawPage;
var finalKey = titleKey(finalTitle);
knownPages[rawKey] = rawPage;
knownPages[finalKey] = finalTitle;
if (rawStopPages[rawKey]) {
stopPages[rawKey] = true;
stopPages[finalKey] = true;
}
});
return {
knownPages: knownPages,
stopPages: stopPages,
redirects: redirects,
count: Object.keys(knownPages).length
};
});
});
}
function buildScanner(container) {
var rootTitle = normalizeTitle(container.getAttribute('data-root') || 'Marine_Corps');
var catalogTitle = normalizeTitle(container.getAttribute('data-catalog') || 'MC:Страницы');
var maxDepth = parseInt(container.getAttribute('data-depth') || '5', 10);
var maxPages = parseInt(container.getAttribute('data-max-pages') || '300', 10);
var extraExcludedTitles = splitDataList(container.getAttribute('data-exclude'));
function isExcludedTitle(title) {
var clean = normalizeTitle(title);
if (!clean) return true;
if (clean.indexOf('#') !== -1) {
clean = normalizeTitle(clean.split('#')[0]);
}
if (extraExcludedTitles.some(function (excluded) {
return titleKey(excluded) === titleKey(clean);
})) {
return true;
}
return EXCLUDED_PREFIXES.some(function (prefix) {
return clean.indexOf(prefix) === 0;
});
}
function isValidLink(link) {
if (!link || !link.title) return false;
if (typeof link.ns === 'number' && link.ns !== 0) return false;
if (isExcludedTitle(link.title)) return false;
return true;
}
function getPageLinks(api, requestedTitle) {
var links = [];
var redirects = [];
var normalizedRequested = normalizeTitle(requestedTitle);
function request(plcontinue) {
var params = {
action: 'query',
prop: 'links',
titles: normalizedRequested,
pllimit: 'max',
redirects: 1,
formatversion: 2
};
if (plcontinue) {
params.plcontinue = plcontinue;
}
return api.get(params).then(function (data) {
if (data.query && data.query.redirects) {
data.query.redirects.forEach(function (redirect) {
redirects.push({
from: normalizeTitle(redirect.from),
to: normalizeTitle(redirect.to)
});
});
}
var pages = data.query && data.query.pages ? data.query.pages : [];
var page = pages[0];
if (!page) {
return {
requestedTitle: normalizedRequested,
finalTitle: normalizedRequested,
exists: false,
links: [],
redirects: redirects
};
}
var finalTitle = normalizeTitle(page.title || normalizedRequested);
if (page.missing !== undefined) {
return {
requestedTitle: normalizedRequested,
finalTitle: finalTitle,
exists: false,
links: [],
redirects: redirects
};
}
if (typeof page.ns === 'number' && page.ns !== 0) {
return {
requestedTitle: normalizedRequested,
finalTitle: finalTitle,
exists: true,
links: [],
redirects: redirects
};
}
if (page.links) {
page.links.forEach(function (link) {
if (isValidLink(link)) {
links.push(normalizeTitle(link.title));
}
});
}
if (data.continue && data.continue.plcontinue) {
return request(data.continue.plcontinue);
}
return {
requestedTitle: normalizedRequested,
finalTitle: finalTitle,
exists: true,
links: uniqueSorted(links),
redirects: redirects
};
});
}
return request();
}
var header = makeEl('div', 'scmc-scan-header');
var title = makeEl('div', 'scmc-scan-title', 'Сканер ссылок Marine Corps');
var subtitle = makeEl(
'div',
'scmc-scan-subtitle',
'Старт: ' + rootTitle + ' · каталог: ' + catalogTitle + ' · глубина: ' + maxDepth + ' · лимит страниц: ' + maxPages
);
header.appendChild(title);
header.appendChild(subtitle);
var controls = makeEl('div', 'scmc-scan-controls');
var scanBtn = makeButton('Сканировать ссылки');
var clearBtn = makeButton('Очистить', 'scmc-scan-btn scmc-scan-btn-secondary');
controls.appendChild(scanBtn);
controls.appendChild(clearBtn);
var statusBox = makeEl('div', 'scmc-scan-status', 'Ожидает запуска');
var statsBox = makeEl('div', 'scmc-scan-stats');
var resultWrap = makeEl('div', 'scmc-scan-result');
var treeBox = makeEl('div', 'scmc-scan-box');
var listBox = makeEl('div', 'scmc-scan-box');
var candidatesBox = makeEl('div', 'scmc-scan-box scmc-scan-box-wide');
var redirectBox = makeEl('div', 'scmc-scan-box scmc-scan-box-wide');
resultWrap.appendChild(treeBox);
resultWrap.appendChild(listBox);
resultWrap.appendChild(candidatesBox);
resultWrap.appendChild(redirectBox);
container.innerHTML = '';
container.appendChild(header);
container.appendChild(controls);
container.appendChild(statusBox);
container.appendChild(statsBox);
container.appendChild(resultWrap);
function setStatus(text, mode) {
statusBox.textContent = text;
statusBox.setAttribute('data-mode', mode || '');
}
function clearResults() {
treeBox.innerHTML = '';
listBox.innerHTML = '';
candidatesBox.innerHTML = '';
redirectBox.innerHTML = '';
statsBox.innerHTML = '';
setStatus('Ожидает запуска', '');
}
function renderTree(root, childrenMap, pageInfo, firstParent, alsoLinkedFrom) {
treeBox.innerHTML = '';
treeBox.appendChild(makeEl('div', 'scmc-scan-box-title', 'Дерево известных страниц'));
function makeNode(title, path) {
var key = titleKey(title);
var info = pageInfo[key];
var node = makeEl('div', 'scmc-scan-node');
var line = makeEl('div', 'scmc-scan-node-line');
var link = document.createElement('a');
link.href = titleToUrl(title);
link.textContent = title;
line.appendChild(link);
if (info) {
line.appendChild(makeEl('span', 'scmc-scan-depth', 'ур. ' + info.depth));
if (info.requestedTitle && titleKey(info.requestedTitle) !== titleKey(info.finalTitle)) {
line.appendChild(makeEl('span', 'scmc-scan-redirect-mini', info.requestedTitle + ' → ' + info.finalTitle));
}
if (info.candidate) {
line.appendChild(makeEl('span', 'scmc-scan-candidate', 'кандидат'));
}
if (info.catalogStop) {
line.appendChild(makeEl('span', 'scmc-scan-stopped', 'остановлено каталогом'));
}
if (info.depthStop) {
line.appendChild(makeEl('span', 'scmc-scan-stopped', 'предел глубины'));
}
if (info.exists === false) {
line.appendChild(makeEl('span', 'scmc-scan-missing', 'нет страницы'));
}
}
node.appendChild(line);
if (path[key]) {
node.appendChild(makeEl('div', 'scmc-scan-loop', '↳ уже встречалась выше'));
return node;
}
var extraLinks = alsoLinkedFrom[key] || [];
if (extraLinks.length) {
node.appendChild(makeEl('div', 'scmc-scan-also', 'Ещё ссылки из: ' + extraLinks.join(', ')));
}
var nextPath = Object.assign({}, path);
nextPath[key] = true;
var children = childrenMap[key] || [];
if (children.length) {
var childrenWrap = makeEl('div', 'scmc-scan-children');
children.forEach(function (childTitle) {
var childKey = titleKey(childTitle);
if (firstParent[childKey] && titleKey(firstParent[childKey]) !== key) {
var refNode = makeEl('div', 'scmc-scan-node');
var refLine = makeEl('div', 'scmc-scan-node-line scmc-scan-node-ref');
var refLink = document.createElement('a');
refLink.href = titleToUrl(childTitle);
refLink.textContent = childTitle;
refLine.appendChild(refLink);
refLine.appendChild(makeEl('span', 'scmc-scan-ref', 'уже найдено в: ' + firstParent[childKey]));
refNode.appendChild(refLine);
childrenWrap.appendChild(refNode);
return;
}
childrenWrap.appendChild(makeNode(childTitle, nextPath));
});
node.appendChild(childrenWrap);
}
return node;
}
treeBox.appendChild(makeNode(root, {}));
}
function renderKnownList(pageInfo) {
listBox.innerHTML = '';
listBox.appendChild(makeEl('div', 'scmc-scan-box-title', 'Известные страницы из каталога'));
var pages = Object.keys(pageInfo)
.map(function (key) {
return pageInfo[key];
})
.filter(function (info) {
return !info.candidate;
})
.sort(function (a, b) {
if (a.depth !== b.depth) return a.depth - b.depth;
return a.finalTitle.localeCompare(b.finalTitle, 'ru');
});
if (!pages.length) {
listBox.appendChild(makeEl('div', 'scmc-scan-empty', 'Пока пусто.'));
return;
}
var list = makeEl('ol', 'scmc-scan-page-list');
pages.forEach(function (info) {
var li = document.createElement('li');
var link = document.createElement('a');
link.href = titleToUrl(info.finalTitle);
link.textContent = info.finalTitle;
li.appendChild(link);
li.appendChild(makeEl('span', 'scmc-scan-depth', 'ур. ' + info.depth));
if (info.requestedTitle && titleKey(info.requestedTitle) !== titleKey(info.finalTitle)) {
li.appendChild(makeEl('span', 'scmc-scan-redirect-mini', 'найдено как: ' + info.requestedTitle));
}
if (info.catalogStop) {
li.appendChild(makeEl('span', 'scmc-scan-stopped', 'остановлено каталогом'));
}
if (info.depthStop) {
li.appendChild(makeEl('span', 'scmc-scan-stopped', 'предел глубины'));
}
list.appendChild(li);
});
listBox.appendChild(list);
}
function renderCandidates(candidates) {
candidatesBox.innerHTML = '';
candidatesBox.appendChild(makeEl('div', 'scmc-scan-box-title', 'Кандидаты: ссылки не из каталога'));
var pages = Object.keys(candidates)
.map(function (key) {
return candidates[key];
})
.sort(function (a, b) {
if (a.depth !== b.depth) return a.depth - b.depth;
return a.finalTitle.localeCompare(b.finalTitle, 'ru');
});
if (!pages.length) {
candidatesBox.appendChild(makeEl('div', 'scmc-scan-empty', 'Новых кандидатов не найдено.'));
return;
}
var list = makeEl('ol', 'scmc-scan-page-list');
pages.forEach(function (info) {
var li = document.createElement('li');
var link = document.createElement('a');
link.href = titleToUrl(info.finalTitle);
link.textContent = info.finalTitle;
li.appendChild(link);
li.appendChild(makeEl('span', 'scmc-scan-depth', 'ур. ' + info.depth));
li.appendChild(makeEl('span', 'scmc-scan-candidate', 'не сканировалась'));
if (info.from) {
li.appendChild(makeEl('span', 'scmc-scan-ref', 'найдена из: ' + info.from));
}
if (info.requestedTitle && titleKey(info.requestedTitle) !== titleKey(info.finalTitle)) {
li.appendChild(makeEl('span', 'scmc-scan-redirect-mini', info.requestedTitle + ' → ' + info.finalTitle));
}
if (info.exists === false) {
li.appendChild(makeEl('span', 'scmc-scan-missing', 'нет страницы'));
}
list.appendChild(li);
});
candidatesBox.appendChild(list);
}
function renderRedirects(redirectsFound) {
redirectBox.innerHTML = '';
redirectBox.appendChild(makeEl('div', 'scmc-scan-box-title', 'Редиректы'));
var keys = Object.keys(redirectsFound).sort(function (a, b) {
return a.localeCompare(b, 'ru');
});
if (!keys.length) {
redirectBox.appendChild(makeEl('div', 'scmc-scan-empty', 'Редиректы не найдены.'));
return;
}
var list = makeEl('ol', 'scmc-scan-page-list');
keys.forEach(function (from) {
var to = redirectsFound[from];
var li = document.createElement('li');
var fromLink = document.createElement('a');
fromLink.href = titleToUrl(from);
fromLink.textContent = from;
var toLink = document.createElement('a');
toLink.href = titleToUrl(to);
toLink.textContent = to;
li.appendChild(fromLink);
li.appendChild(document.createTextNode(' → '));
li.appendChild(toLink);
list.appendChild(li);
});
redirectBox.appendChild(list);
}
function scan() {
clearResults();
var api = getApi();
scanBtn.disabled = true;
clearBtn.disabled = true;
setStatus('Читаю каталог: ' + catalogTitle, 'scanning');
loadCatalog(api, catalogTitle).then(function (catalog) {
var knownPages = catalog.knownPages;
var stopPages = catalog.stopPages;
var redirectsFound = Object.assign({}, catalog.redirects);
var queue = [{
requestedTitle: rootTitle,
depth: 0,
parent: null
}];
var scanned = {};
var queued = {};
var pageInfo = {};
var candidates = {};
var childrenMap = {};
var firstParent = {};
var alsoLinkedFrom = {};
var stoppedByLimit = false;
queued[titleKey(rootTitle)] = true;
function addRedirects(redirects) {
redirects.forEach(function (redirect) {
if (redirect.from && redirect.to) {
redirectsFound[redirect.from] = redirect.to;
}
});
}
function addAlsoLinked(childTitle, parentTitle) {
var childKey = titleKey(childTitle);
if (!alsoLinkedFrom[childKey]) {
alsoLinkedFrom[childKey] = [];
}
if (alsoLinkedFrom[childKey].indexOf(parentTitle) === -1) {
alsoLinkedFrom[childKey].push(parentTitle);
}
}
function upsertPageInfo(finalTitle, requestedTitle, depth, exists, options) {
var finalKey = titleKey(finalTitle);
var current = pageInfo[finalKey];
if (!current) {
current = {
requestedTitle: requestedTitle || finalTitle,
finalTitle: finalTitle,
depth: depth,
exists: exists
};
pageInfo[finalKey] = current;
}
if (depth < current.depth) {
current.depth = depth;
}
if (requestedTitle && titleKey(requestedTitle) !== titleKey(finalTitle)) {
current.requestedTitle = requestedTitle;
}
if (exists === false) {
current.exists = false;
}
if (options) {
Object.keys(options).forEach(function (key) {
current[key] = options[key];
});
}
return current;
}
function step() {
if (!queue.length) {
finish();
return;
}
if (Object.keys(scanned).length >= maxPages) {
stoppedByLimit = true;
finish();
return;
}
var item = queue.shift();
var requestedTitle = normalizeTitle(item.requestedTitle);
var requestedKey = titleKey(requestedTitle);
if (scanned[requestedKey]) {
step();
return;
}
scanned[requestedKey] = true;
setStatus('Сканирую: ' + requestedTitle + ' · уровень ' + item.depth, 'scanning');
getPageLinks(api, requestedTitle).then(function (data) {
addRedirects(data.redirects);
var finalTitle = normalizeTitle(data.finalTitle || requestedTitle);
var finalKey = titleKey(finalTitle);
var isRoot = item.depth === 0;
var isKnown = !!knownPages[finalKey] || !!knownPages[requestedKey] || isRoot;
var isCatalogStop = !!stopPages[finalKey] || !!stopPages[requestedKey];
upsertPageInfo(finalTitle, requestedTitle, item.depth, data.exists, {
catalogStop: isCatalogStop
});
if (!isKnown) {
candidates[finalKey] = {
requestedTitle: requestedTitle,
finalTitle: finalTitle,
depth: item.depth,
exists: data.exists,
from: item.parent || '',
candidate: true
};
upsertPageInfo(finalTitle, requestedTitle, item.depth, data.exists, {
candidate: true
});
setTimeout(step, 80);
return;
}
if (isCatalogStop) {
childrenMap[finalKey] = [];
setTimeout(step, 80);
return;
}
if (item.depth >= maxDepth) {
pageInfo[finalKey].depthStop = true;
childrenMap[finalKey] = [];
setTimeout(step, 80);
return;
}
resolveTitles(api, data.links).then(function (resolvedLinks) {
Object.keys(resolvedLinks.redirects).forEach(function (from) {
redirectsFound[from] = resolvedLinks.redirects[from];
});
var children = [];
data.links.forEach(function (originalLinkTitle) {
var originalKey = titleKey(originalLinkTitle);
var resolved = resolvedLinks.items[originalKey];
var childRequested = resolved ? resolved.requestedTitle : originalLinkTitle;
var childFinal = resolved ? resolved.finalTitle : originalLinkTitle;
var childExists = resolved ? resolved.exists : null;
var childKey = titleKey(childFinal);
if (isExcludedTitle(childRequested) || isExcludedTitle(childFinal)) {
return;
}
var childKnown = !!knownPages[childKey] || !!knownPages[originalKey];
var childStop = !!stopPages[childKey] || !!stopPages[originalKey];
children.push(childFinal);
if (!firstParent[childKey]) {
firstParent[childKey] = finalTitle;
} else if (titleKey(firstParent[childKey]) !== finalKey) {
addAlsoLinked(childFinal, finalTitle);
}
if (childKnown) {
upsertPageInfo(childFinal, childRequested, item.depth + 1, childExists, {
catalogStop: childStop
});
if (!childStop && !queued[childKey]) {
queued[childKey] = true;
queue.push({
requestedTitle: childFinal,
depth: item.depth + 1,
parent: finalTitle
});
}
} else {
candidates[childKey] = {
requestedTitle: childRequested,
finalTitle: childFinal,
depth: item.depth + 1,
exists: childExists,
from: finalTitle,
candidate: true
};
upsertPageInfo(childFinal, childRequested, item.depth + 1, childExists, {
candidate: true
});
}
});
childrenMap[finalKey] = uniqueSorted(children);
statsBox.textContent =
'Каталог: ' + Object.keys(knownPages).length +
' · Просканировано: ' + Object.keys(scanned).length +
' · В очереди: ' + queue.length +
' · Известных найдено: ' + Object.keys(pageInfo).length +
' · Кандидатов: ' + Object.keys(candidates).length +
' · Редиректов: ' + Object.keys(redirectsFound).length;
setTimeout(step, 100);
});
}).catch(function (error) {
console.error(error);
setStatus('Ошибка при сканировании: ' + requestedTitle, 'error');
scanBtn.disabled = false;
clearBtn.disabled = false;
});
}
function finish() {
var rootFinal = rootTitle;
Object.keys(pageInfo).some(function (key) {
var info = pageInfo[key];
if (titleKey(info.requestedTitle) === titleKey(rootTitle) || titleKey(info.finalTitle) === titleKey(rootTitle)) {
rootFinal = info.finalTitle;
return true;
}
return false;
});
renderTree(rootFinal, childrenMap, pageInfo, firstParent, alsoLinkedFrom);
renderKnownList(pageInfo);
renderCandidates(candidates);
renderRedirects(redirectsFound);
var finalText =
'Готово. Каталог прочитан. Просканировано: ' + Object.keys(scanned).length +
'. Кандидатов: ' + Object.keys(candidates).length +
'. Редиректов: ' + Object.keys(redirectsFound).length + '.';
if (stoppedByLimit) {
finalText += ' Остановлено по лимиту страниц.';
}
statsBox.textContent = finalText;
setStatus('Готово', 'done');
scanBtn.disabled = false;
clearBtn.disabled = false;
}
step();
}).catch(function (error) {
console.error(error);
setStatus('Не удалось прочитать каталог: ' + catalogTitle, 'error');
scanBtn.disabled = false;
clearBtn.disabled = false;
});
}
scanBtn.addEventListener('click', scan);
clearBtn.addEventListener('click', clearResults);
}
buildScanner(scannerRoot);
})();