@@ -7,11 +7,16 @@ import kotlinx.coroutines.withContext
77import org.wikipedia.Constants
88import org.wikipedia.dataclient.ServiceFactory
99import org.wikipedia.dataclient.WikiSite
10+ import org.wikipedia.dataclient.mwapi.MwException
1011import org.wikipedia.dataclient.mwapi.MwQueryPage
1112import org.wikipedia.dataclient.mwapi.MwQueryResult
1213import org.wikipedia.dataclient.page.PageSummary
14+ import org.wikipedia.dataclient.wikidata.Entities
15+ import org.wikipedia.descriptions.DescriptionEditUtil
16+ import org.wikipedia.json.JsonUtil
1317import org.wikipedia.page.PageTitle
1418import org.wikipedia.suggestededits.SuggestedEditsRecentEditsViewModel
19+ import org.wikipedia.util.log.L
1520import java.time.Instant
1621import java.util.concurrent.Semaphore
1722import kotlin.math.abs
@@ -42,7 +47,7 @@ object EditingSuggestionsProvider {
4247 private var revertCandidateLastRevId = 0L
4348 private var revertCandidateLastTimeStamp = Instant .now()
4449
45- private const val MAX_RETRY_LIMIT : Long = 50
50+ private const val MAX_RETRY_LIMIT : Long = 20
4651
4752 suspend fun getNextArticleWithMissingDescription (wiki : WikiSite , retryLimit : Long = MAX_RETRY_LIMIT ): PageSummary {
4853 var pageSummary: PageSummary
@@ -59,21 +64,35 @@ object EditingSuggestionsProvider {
5964 }
6065
6166 var tries = 0
62- do {
63- val listOfSuggestedEditItem = ServiceFactory .getRest(Constants .wikidataWikiSite)
64- .getArticlesWithoutDescriptions(WikiSite .normalizeLanguageCode(wiki.languageCode))
65- val mwQueryResponse = ServiceFactory .get(wiki)
66- .getDescription(listOfSuggestedEditItem.joinToString(" |" ) { it.title() })
67+ while (tries++ <= retryLimit && title.isEmpty()) {
68+ // Fetch a batch of random articles, and get the ones that have no description.
69+ val resultsWithNoDescription = ServiceFactory .get(wiki).getRandomPages().query?.pages?.filter {
70+ it.description.isNullOrEmpty()
71+ }.orEmpty()
72+
6773 articlesWithMissingDescriptionCacheLang = wiki.languageCode
68- mwQueryResponse.query?.pages?.forEach {
69- if (it.description.isNullOrEmpty()) {
74+
75+ if (resultsWithNoDescription.isEmpty() || DescriptionEditUtil .wikiUsesLocalDescriptions(wiki.languageCode)) {
76+ resultsWithNoDescription.forEach {
7077 articlesWithMissingDescriptionCache.addFirst(it.title)
7178 }
79+ } else {
80+ // If the wiki uses Wikidata descriptions, check protection status of the Wikidata items.
81+ val qNums = resultsWithNoDescription.mapNotNull { it.pageProps?.wikiBaseItem.orEmpty().ifEmpty { null } }
82+ val wdResponse = ServiceFactory .get(Constants .wikidataWikiSite).getProtection(qNums.joinToString(" |" ))
83+ val unprotectedQNums = wdResponse.query?.pages?.filter { it.protection.isEmpty() }?.map { it.title }
84+
85+ resultsWithNoDescription.forEach {
86+ if (unprotectedQNums?.contains(it.pageProps?.wikiBaseItem) == true ) {
87+ articlesWithMissingDescriptionCache.addFirst(it.title)
88+ }
89+ }
7290 }
91+
7392 if (! articlesWithMissingDescriptionCache.isEmpty()) {
7493 title = articlesWithMissingDescriptionCache.removeFirst()
7594 }
76- } while (tries ++ < retryLimit && title.isEmpty())
95+ }
7796
7897 pageSummary = ServiceFactory .getRest(wiki).getPageSummary(null , title)
7998 } finally {
@@ -83,8 +102,7 @@ object EditingSuggestionsProvider {
83102 return pageSummary
84103 }
85104
86- suspend fun getNextArticleWithMissingDescription (sourceWiki : WikiSite , targetLang : String ,
87- retryLimit : Long = MAX_RETRY_LIMIT ): Pair <PageSummary , PageSummary > {
105+ suspend fun getNextArticleWithMissingDescription (sourceWiki : WikiSite , targetLang : String , retryLimit : Long = MAX_RETRY_LIMIT ): Pair <PageSummary , PageSummary > {
88106 var pair = Pair (PageSummary (), PageSummary ())
89107 withContext(Dispatchers .IO ) {
90108 mutex.acquire()
@@ -100,41 +118,39 @@ object EditingSuggestionsProvider {
100118 titles = articlesWithTranslatableDescriptionCache.removeFirst()
101119 }
102120 var tries = 0
103- do {
104- val listOfSuggestedEditItem = ServiceFactory .getRest(Constants .wikidataWikiSite)
105- .getArticlesWithTranslatableDescriptions(WikiSite .normalizeLanguageCode(sourceWiki.languageCode),
106- WikiSite .normalizeLanguageCode(targetLang))
107- val mwQueryPages = ServiceFactory .get(targetWiki)
108- .getDescription(listOfSuggestedEditItem.joinToString(" |" ) { it.title() }).query?.pages
121+ while (tries++ <= retryLimit && titles == null ) {
122+ // Fetch a batch of random articles from the target language wiki, and get ones that have no description.
123+ val resultsWithNoDescription = ServiceFactory .get(targetWiki).getRandomPages().query?.pages?.filter {
124+ it.description.isNullOrEmpty()
125+ }.orEmpty()
109126
110127 articlesWithTranslatableDescriptionCacheFromLang = sourceWiki.languageCode
111128 articlesWithTranslatableDescriptionCacheToLang = targetLang
112129
113- listOfSuggestedEditItem.forEach { item ->
114- val page = mwQueryPages?.find { it.title == item.title() }
115- if (page != null && ! page.description.isNullOrEmpty()) {
116- return @forEach
117- }
118- val descriptions = item.entity?.getDescriptions().orEmpty()
119- val siteLinks = item.entity?.getSiteLinks().orEmpty()
120- if (descriptions.containsKey(targetLang) ||
121- ! descriptions.containsKey(sourceWiki.languageCode) ||
122- ! siteLinks.containsKey(sourceWiki.dbName()) ||
123- ! siteLinks.containsKey(targetWiki.dbName())
124- ) {
125- return @forEach
126- }
127- val sourceTitle = PageTitle (siteLinks[sourceWiki.dbName()]!! .title, sourceWiki).apply {
128- description = descriptions[sourceWiki.languageCode]?.value
130+ // Get the Wikidata entities for the articles, to see if they have descriptions in the source language.
131+ val qNums = resultsWithNoDescription.mapNotNull { it.pageProps?.wikiBaseItem.orEmpty().ifEmpty { null } }
132+ val wdResponse = ServiceFactory .get(Constants .wikidataWikiSite).getWikidataLabelsAndDescriptions(
133+ qNums.joinToString(" |" ),
134+ WikiSite .normalizeLanguageCode(sourceWiki.languageCode) + " |" + WikiSite .normalizeLanguageCode(targetLang),
135+ sourceWiki.dbName() + " |" + targetWiki.dbName())
136+
137+ // Get the Q numbers for which the source language description exists
138+ val sourceLangEntities = wdResponse.entities.filter {
139+ it.value.getDescriptions()[sourceWiki.languageCode]?.value.orEmpty().isNotEmpty() &&
140+ it.value.getSiteLinks()[sourceWiki.dbName()]?.title.orEmpty().isNotEmpty() }
141+
142+ sourceLangEntities.values.forEach { entity ->
143+ val sourceTitle = PageTitle (entity.getSiteLinks()[sourceWiki.dbName()]!! .title, sourceWiki).apply {
144+ description = entity.getDescriptions()[sourceWiki.languageCode]?.value
129145 }
130- val targetTitle = PageTitle (siteLinks [targetWiki.dbName()]!! .title, targetWiki)
146+ val targetTitle = PageTitle (entity.getSiteLinks() [targetWiki.dbName()]!! .title, targetWiki)
131147 articlesWithTranslatableDescriptionCache.addFirst(sourceTitle to targetTitle)
132148 }
133149
134150 if (! articlesWithTranslatableDescriptionCache.isEmpty()) {
135151 titles = articlesWithTranslatableDescriptionCache.removeFirst()
136152 }
137- } while (tries ++ < retryLimit && titles == null )
153+ }
138154
139155 titles?.let {
140156 val sourcePageSummary = async {
@@ -170,16 +186,33 @@ object EditingSuggestionsProvider {
170186 }
171187 imagesWithMissingCaptionsCacheLang = lang
172188 var tries = 0
173- do {
174- val listOfSuggestedEditItem = ServiceFactory .getRest(Constants .commonsWikiSite)
175- .getImagesWithoutCaptions(WikiSite .normalizeLanguageCode(lang))
176- listOfSuggestedEditItem.forEach {
177- imagesWithMissingCaptionsCache.addFirst(it.title())
178- }
179- if (! imagesWithMissingCaptionsCache.isEmpty()) {
180- title = imagesWithMissingCaptionsCache.removeFirst()
189+ while (tries++ <= retryLimit && title.isEmpty()) {
190+ try {
191+ val candidates = ServiceFactory .get(Constants .commonsWikiSite).getRandomImages()
192+ .query?.pages?.filter {
193+ it.imageInfo()?.mime.orEmpty().startsWith(" image" ) &&
194+ it.protection.isEmpty()
195+ }.orEmpty()
196+
197+ candidates.forEach { candidate ->
198+ val entityJson = candidate.revisions.firstOrNull()?.getContentFromSlot(" mediainfo" )
199+ if (entityJson.isNullOrEmpty()) {
200+ return @forEach
201+ }
202+ JsonUtil .decodeFromString<Entities .Entity >(entityJson)?.let { entity ->
203+ if (entity.getLabels()[WikiSite .normalizeLanguageCode(lang)]?.value.isNullOrEmpty()) {
204+ imagesWithMissingCaptionsCache.addFirst(candidate.title)
205+ }
206+ }
207+ }
208+
209+ if (! imagesWithMissingCaptionsCache.isEmpty()) {
210+ title = imagesWithMissingCaptionsCache.removeFirst()
211+ }
212+ } catch (e: MwException ) {
213+ L .w(e)
181214 }
182- } while (tries ++ < retryLimit && title.isEmpty())
215+ }
183216 } finally {
184217 mutex.release()
185218 }
@@ -205,21 +238,35 @@ object EditingSuggestionsProvider {
205238 imagesWithTranslatableCaptionCacheFromLang = sourceLang
206239 imagesWithTranslatableCaptionCacheToLang = targetLang
207240 var tries = 0
208- do {
209- val listOfSuggestedEditItem = ServiceFactory .getRest(Constants .commonsWikiSite).getImagesWithTranslatableCaptions(
210- WikiSite .normalizeLanguageCode(sourceLang),
211- WikiSite .normalizeLanguageCode(targetLang)
212- )
213- listOfSuggestedEditItem.forEach {
214- if (! it.captions.containsKey(sourceLang) || it.captions.containsKey(targetLang)) {
215- return @forEach
241+ while (tries++ <= retryLimit && (pair.first.isEmpty() || pair.second.isEmpty())) {
242+ try {
243+ val candidates = ServiceFactory .get(Constants .commonsWikiSite).getRandomImages(50 )
244+ .query?.pages?.filter {
245+ it.imageInfo()?.mime.orEmpty().startsWith(" image" ) &&
246+ it.protection.isEmpty()
247+ }.orEmpty()
248+
249+ candidates.forEach { candidate ->
250+ val entityJson = candidate.revisions.firstOrNull()?.getContentFromSlot(" mediainfo" )
251+ if (entityJson.isNullOrEmpty()) {
252+ return @forEach
253+ }
254+ JsonUtil .decodeFromString<Entities .Entity >(entityJson)?.let { entity ->
255+ val labels = entity.getLabels()
256+ if (labels[WikiSite .normalizeLanguageCode(sourceLang)]?.value.orEmpty().isNotEmpty() &&
257+ labels[WikiSite .normalizeLanguageCode(targetLang)]?.value.isNullOrEmpty()) {
258+ imagesWithTranslatableCaptionCache.addFirst(labels[sourceLang]?.value.orEmpty() to candidate.title)
259+ }
260+ }
216261 }
217- imagesWithTranslatableCaptionCache.addFirst((it.captions[sourceLang] ? : error(" " )) to it.title())
262+ } catch (e: MwException ) {
263+ L .w(e)
218264 }
265+
219266 if (! imagesWithTranslatableCaptionCache.isEmpty()) {
220267 pair = imagesWithTranslatableCaptionCache.removeFirst()
221268 }
222- } while (tries ++ < retryLimit && (pair.first.isEmpty() || pair.second.isEmpty()))
269+ }
223270 } finally {
224271 mutex.release()
225272 }
0 commit comments