diff --git a/Cargo.toml b/Cargo.toml index 8e2be053..4cfaaedb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,6 +34,8 @@ biblatex = { git = "https://github.com/typst/biblatex.git", features = [ ciborium = { version = "0.2.1", optional = true } clap = { version = "4", optional = true, features = ["cargo"] } strum = { version = "0.26", features = ["derive"], optional = true } +icu_collator = "2.0.0" +icu_locale = "2.0.0" [dev-dependencies] heck = "0.5" diff --git a/src/csl/sort.rs b/src/csl/sort.rs index 2783563b..547efc81 100644 --- a/src/csl/sort.rs +++ b/src/csl/sort.rs @@ -5,6 +5,9 @@ use citationberg::{ DemoteNonDroppingParticle, InheritableNameOptions, LocaleCode, LongShortForm, Sort, SortDirection, SortKey, }; +use icu_collator::options::CollatorOptions; +use icu_collator::Collator; +use icu_locale::Locale as IcuLocale; use crate::csl::rendering::RenderCsl; use crate::csl::BufWriteFormat; @@ -12,6 +15,39 @@ use crate::csl::BufWriteFormat; use super::taxonomy::EntryLike; use super::{CitationItem, InstanceContext, StyleContext}; +trait CollationOrd: Ord { + fn collation_cmp(&self, other: &Self, locale: LocaleCode) -> Ordering; +} + +impl CollationOrd for str { + fn collation_cmp(&self, other: &Self, locale: LocaleCode) -> Ordering { + if let Ok(locale) = locale.0.parse::() { + let options = CollatorOptions::default(); + let collator = Collator::try_new(locale.into(), options).unwrap(); + collator.compare(self, other) + } else { + self.cmp(other) + } + } +} + +impl CollationOrd for Option { + fn collation_cmp(&self, other: &Self, locale: LocaleCode) -> Ordering { + match (self, other) { + (Some(a), Some(b)) => a.collation_cmp(b, locale), + (Some(_), None) => Ordering::Greater, + (None, Some(_)) => Ordering::Less, + (None, None) => Ordering::Equal, + } + } +} + +impl CollationOrd for String { + fn collation_cmp(&self, other: &Self, locale: LocaleCode) -> Ordering { + CollationOrd::collation_cmp(self as &str, other, locale) + } +} + impl StyleContext<'_> { /// Retrieve the ordering of two entries according to the given sort key. fn cmp_entries( @@ -32,7 +68,7 @@ impl StyleContext<'_> { .resolve_standard_variable(LongShortForm::default(), *s) .map(|s| s.to_string().to_lowercase()); - a.cmp(&b) + a.collation_cmp(&b, self.locale()) } SortKey::Variable { variable: Variable::Date(d), .. } => { let a = a.entry.resolve_date_variable(*d); @@ -133,7 +169,7 @@ impl StyleContext<'_> { let a_rendered = render(a, a_idx); let b_rendered = render(b, b_idx); - a_rendered.cmp(&b_rendered) + a_rendered.collation_cmp(&b_rendered, self.locale()) } }; diff --git a/tests/citeproc-pass.txt b/tests/citeproc-pass.txt index e8ad7e58..5bce60d5 100644 --- a/tests/citeproc-pass.txt +++ b/tests/citeproc-pass.txt @@ -400,6 +400,7 @@ sort_CitationNumberPrimaryAscendingViaMacroBibliography sort_CitationNumberPrimaryAscendingViaVariableBibliography sort_CitationSecondaryKey sort_CiteGroupDelimiter +sort_ConditionalMacroDates sort_DaleDalebout sort_DateMacroSortWithSecondFieldAlign sort_DateVariable diff --git a/tests/local/sort_Unicode.txt b/tests/local/sort_Unicode.txt new file mode 100644 index 00000000..08b8e4a5 --- /dev/null +++ b/tests/local/sort_Unicode.txt @@ -0,0 +1,120 @@ +>>==== MODE ====>> +citation +<<==== MODE ====<< + +>>==== RESULT ====>> +Ambroise-Rendu; Ère; Łakomy; Roux; Žižek +<<==== RESULT ====<< + +>>==== CITATION-ITEMS ====>> +[ + [ + { + "id": "ITEM-1" + }, + { + "id": "ITEM-2" + }, + { + "id": "ITEM-3" + }, + { + "id": "ITEM-4" + }, + { + "id": "ITEM-5" + } + ] +] +<<==== CITATION-ITEMS ====<< + +>>==== CSL ====>> + +<<==== CSL ====<< + +>>==== INPUT ====>> +[ + { + "author": [ + { + "family": "Ambroise-Rendu", + "given": "Marc" + } + ], + "id": "ITEM-1", + "type": "book" + }, + { + "author": [ + { + "family": "Roux", + "given": "Nicolas" + } + ], + "id": "ITEM-2", + "type": "book" + }, + { + "author": [ + { + "family": "Łakomy", + "given": "Henryk" + } + ], + "id": "ITEM-3", + "type": "book" + }, + { + "author": [ + { + "family": "Ère", + "given": "Informatique" + } + ], + "id": "ITEM-4", + "type": "book" + }, + { + "author": [ + { + "family": "Žižek", + "given": "Slavoj" + } + ], + "id": "ITEM-5", + "type": "book" + } +] +<<==== INPUT ====<< + + + +>>===== VERSION =====>> +1.0 +<<===== VERSION =====<< \ No newline at end of file