- Based on data from December 15, 2018.
- CK's native speaker list http://bit.ly/nativespeakers
- sentences_detailed.tar.bz2
- The "percent native" column may indicate the trustworthiness of sentences in each language.
- Note that 0% indicates constructed languages, dead languages, and language with no identified native speakers working on the project.
code | total sentences | owned by native speakers | percent native | language |
eng | 1149234 | 922992 | 80% | English |
rus | 658806 | 627735 | 95% | Russian |
tur | 648823 | 639967 | 99% | Turkish |
ita | 645940 | 635900 | 98% | Italian |
epo | 582536 | 0 | 0% | Esperanto |
deu | 448280 | 413840 | 92% | German |
fra | 370616 | 302103 | 82% | French |
por | 314125 | 301532 | 96% | Portuguese |
spa | 295110 | 249577 | 85% | Spanish |
hun | 232686 | 228675 | 98% | Hungarian |
heb | 189982 | 184357 | 97% | Hebrew |
jpn | 186264 | 50796 | 27% | Japanese |
ber | 162548 | 160029 | 98% | Berber |
ukr | 130249 | 119782 | 92% | Ukrainian |
pol | 96165 | 89312 | 93% | Polish |
fin | 87310 | 84362 | 97% | Finnish |
nld | 81975 | 59800 | 73% | Dutch |
mkd | 77687 | 77598 | 100% | Macedonian |
cmn | 59729 | 39483 | 66% | Chinese (Mandarin) |
kab | 56677 | 55793 | 98% | Kabyle |
mar | 47047 | 47028 | 100% | Marathi |
dan | 38170 | 36883 | 97% | Danish |
swe | 33744 | 28348 | 84% | Swedish |
ara | 30627 | 24127 | 79% | Arabic |
lat | 30506 | 0 | 0% | Latin |
srp | 30392 | 28818 | 95% | Serbian |
ces | 27867 | 19500 | 70% | Czech |
ell | 27717 | 22941 | 83% | Greek |
lit | 26217 | 19159 | 73% | Lithuanian |
toki | 25542 | 0 | 0% | Toki Pona |
bul | 24069 | 23288 | 97% | Bulgarian |
ina | 22637 | 0 | 0% | Interlingua |
pes | 18514 | 16970 | 92% | Persian |
nds | 17646 | 15984 | 91% | Low Saxon |
ron | 17203 | 14899 | 87% | Romanian |
tlh | 17054 | 0 | 0% | Klingon |
jbo | 15427 | 0 | 0% | Lojban |
tat | 13352 | 9636 | 72% | Tatar |
nob | 13333 | 8139 | 61% | Norwegian (Bokmål) |
tgl | 13257 | 8774 | 66% | Tagalog |
ind | 11996 | 11258 | 94% | Indonesian |
hin | 11696 | 3940 | 34% | Hindi |
isl | 11064 | 10706 | 97% | Icelandic |
vie | 10366 | 9186 | 89% | Vietnamese |
bel | 8220 | 7523 | 92% | Belarusian |
lfn | 7878 | 0 | 0% | Lingua Franca Nova |
uig | 7587 | 2771 | 37% | Uyghur |
bre | 7115 | 530 | 7% | Breton |
ile | 6119 | 0 | 0% | Interlingue |
eus | 6061 | 2823 | 47% | Basque |
cat | 5977 | 4149 | 69% | Catalan |
yue | 5718 | 4908 | 86% | Cantonese |
oci | 5624 | 62 | 1% | Occitan |
hrv | 5107 | 4336 | 85% | Croatian |
ido | 5067 | 0 | 0% | Ido |
aze | 4891 | 3809 | 78% | Azerbaijani |
ben | 4683 | 4312 | 92% | Bengali |
glg | 4356 | 3209 | 74% | Galician |
wuu | 4311 | 0 | 0% | Shanghainese |
mhr | 4297 | 3034 | 71% | Meadow Mari |
avk | 3896 | 32 | 1% | Kotava |
cor | 3864 | 0 | 0% | Cornish |
kor | 3764 | 1488 | 40% | Korean |
afr | 3670 | 2388 | 65% | Afrikaans |
slk | 3660 | 1855 | 51% | Slovak |
est | 2955 | 2421 | 82% | Estonian |
cbk | 2557 | 2547 | 100% | Chavacano |
kaz | 2539 | 51 | 2% | Kazakh |
vol | 2525 | 0 | 0% | Volapük |
dtp | 2477 | 2422 | 98% | Central Dusun |
ilo | 2452 | 2452 | 100% | Ilocano |
zsm | 2282 | 899 | 39% | Malay |
hye | 2221 | 180 | 8% | Armenian |
war | 2023 | 2019 | 100% | Waray |
gle | 1978 | 0 | 0% | Irish |
urd | 1930 | 1585 | 82% | Urdu |
kzj | 1888 | 0 | 0% | Coastal Kadazan |
lvs | 1854 | 1039 | 56% | Latvian |
lzh | 1820 | 0 | 0% | Literary Chinese |
chv | 1512 | 1421 | 94% | Chuvash |
khm | 1502 | 1475 | 98% | Khmer |
pam | 1482 | 1480 | 100% | Kapampangan |
ceb | 1428 | 703 | 49% | Cebuano |
orv | 1305 | 0 | 0% | Old East Slavic |
sqi | 1270 | 837 | 66% | Albanian |
nno | 1162 | 1 | 0% | Norwegian (Nynorsk) |
grn | 1116 | 0 | 0% | Guarani |
arq | 1109 | 316 | 28% | Algerian Arabic |
slv | 994 | 656 | 66% | Slovenian |
yid | 981 | 0 | 0% | Yiddish |
kha | 980 | 956 | 98% | Khasi |
gla | 946 | 0 | 0% | Scottish Gaelic |
sah | 905 | 84 | 9% | Yakut |
kat | 901 | 293 | 33% | Georgian |
csb | 879 | 0 | 0% | Kashubian |
xal | 869 | 377 | 43% | Kalmyk |
prg | 838 | 0 | 0% | Old Prussian |
pms | 822 | 87 | 11% | Piedmontese |
zza | 816 | 814 | 100% | Zaza |
nst | 793 | 362 | 46% | Naga (Tangshang) |
rom | 787 | 18 | 2% | Romani |
run | 776 | 0 | 0% | Kirundi |
hsb | 775 | 0 | 0% | Upper Sorbian |
dsb | 758 | 0 | 0% | Lower Sorbian |
mal | 736 | 716 | 97% | Malayalam |
grc | 720 | 0 | 0% | Ancient Greek |
uzb | 651 | 0 | 0% | Uzbek |
hoc | 651 | 0 | 0% | Ho |
cym | 631 | 5 | 1% | Welsh |
kur | 618 | 529 | 86% | Kurdish |
arz | 608 | 0 | 0% | Egyptian Arabic |
tha | 603 | 227 | 38% | Thai |
tpw | 559 | 0 | 0% | Old Tupi |
mon | 557 | 47 | 8% | Mongolian |
swg | 530 | 0 | 0% | Swabian |
swh | 443 | 4 | 1% | Swahili |
bos | 432 | 2 | 0% | Bosnian |
max | 427 | 406 | 95% | North Moluccan Malay |
ota | 408 | 19 | 5% | Ottoman Turkish |
que | 397 | 23 | 6% | Quechua |
mri | 367 | 304 | 83% | Maori |
tam | 322 | 252 | 78% | Tamil |
nov | 309 | 0 | 0% | Novial |
jav | 295 | 0 | 0% | Javanese |
fao | 292 | 0 | 0% | Faroese |
crh | 292 | 0 | 0% | Crimean Tatar |
tuk | 281 | 0 | 0% | Turkmen |
ckt | 260 | 0 | 0% | Chukchi |
bua | 253 | 0 | 0% | Buryat |
awa | 253 | 0 | 0% | Awadhi |
tel | 248 | 172 | 69% | Telugu |
oss | 232 | 0 | 0% | Ossetian |
nah | 209 | 0 | 0% | Nahuatl |
pan | 202 | 0 | 0% | Punjabi (Eastern) |
shy | 201 | 201 | 100% | Tachawit |
fry | 200 | 0 | 0% | Frisian |
amh | 193 | 0 | 0% | Amharic |
tzl | 185 | 0 | 0% | Talossan |
cha | 183 | 154 | 84% | Chamorro |
bak | 181 | 0 | 0% | Bashkir |
mlt | 178 | 20 | 11% | Maltese |
ast | 172 | 37 | 22% | Asturian |
guj | 168 | 156 | 93% | Gujarati |
kan | 166 | 161 | 97% | Kannada |
gsw | 159 | 0 | 0% | Swiss German |
xho | 152 | 0 | 0% | Xhosa |
sux | 152 | 0 | 0% | Sumerian |
haw | 151 | 0 | 0% | Hawaiian |
egl | 144 | 0 | 0% | Emilian |
ang | 143 | 0 | 0% | Old English |
san | 142 | 0 | 0% | Sanskrit |
bar | 138 | 2 | 1% | Bavarian |
krl | 130 | 0 | 0% | Karelian |
qya | 123 | 0 | 0% | Quenya |
min | 120 | 0 | 0% | Minangkabau |
npi | 119 | 6 | 5% | Nepali |
ltz | 117 | 0 | 0% | Luxembourgish |
rue | 116 | 116 | 100% | Rusyn |
nog | 114 | 0 | 0% | Nogai |
kir | 110 | 0 | 0% | Kyrgyz |
kum | 106 | 0 | 0% | Kumyk |
pcd | 105 | 0 | 0% | Picard |
got | 99 | 0 | 0% | Gothic |
mya | 98 | 58 | 59% | Burmese |
cho | 98 | 0 | 0% | Choctaw |
shs | 97 | 0 | 0% | Shuswap |
tpi | 93 | 0 | 0% | Tok Pisin |
zlm | 92 | 3 | 3% | Malay (Vernacular) |
ldn | 91 | 0 | 0% | Láadan |
cmo | 85 | 0 | 0% | Mnong, Central |
nav | 84 | 0 | 0% | Navajo |
lld | 84 | 0 | 0% | Ladin |
mrj | 83 | 0 | 0% | Hill Mari |
tir | 82 | 0 | 0% | Tigrinya |
mww | 82 | 0 | 0% | Hmong Daw (White) |
kal | 82 | 0 | 0% | Greenlandic |
afh | 80 | 0 | 0% | Afrihili |
smo | 75 | 0 | 0% | Samoan |
afb | 68 | 60 | 88% | Arabic (Gulf) |
udm | 67 | 0 | 0% | Udmurt |
pap | 67 | 0 | 0% | Papiamento |
tly | 66 | 0 | 0% | Talysh |
ext | 65 | 65 | 100% | Extremaduran |
tgk | 63 | 2 | 3% | Tajik |
lad | 63 | 0 | 0% | Ladino |
mgm | 60 | 0 | 0% | Mambae |
hau | 60 | 6 | 10% | Hausa |
ppl | 59 | 0 | 0% | Pipil |
pdc | 58 | 0 | 0% | Pennsylvania German |
dws | 57 | 0 | 0% | Dutton World Speedwords |
rif | 56 | 0 | 0% | Tarifit |
bvy | 56 | 0 | 0% | Baybayanon |
pag | 55 | 41 | 75% | Pangasinan |
ary | 53 | 0 | 0% | Moroccan Arabic |
wln | 52 | 0 | 0% | Walloon |
tet | 51 | 0 | 0% | Tetun |
kas | 49 | 0 | 0% | Kashmiri |
acm | 48 | 46 | 96% | Iraqi Arabic |
sgs | 47 | 46 | 98% | Samogitian |
sma | 46 | 0 | 0% | Southern Sami |
ike | 46 | 0 | 0% | #N/A |
krc | 45 | 0 | 0% | Karachay-Balkar |
kpv | 45 | 0 | 0% | #N/A |
hat | 45 | 0 | 0% | Haitian Creole |
sin | 44 | 6 | 14% | Sinhala |
koi | 44 | 0 | 0% | #N/A |
ngu | 43 | 0 | 0% | Guerrero Nahuatl |
hil | 43 | 0 | 0% | Hiligaynon |
sna | 42 | 0 | 0% | Shona |
fkv | 42 | 0 | 0% | Kven Finnish |
iba | 41 | 0 | 0% | Iban |
asm | 41 | 35 | 85% | Assamese |
lin | 40 | 0 | 0% | Lingala |
bod | 40 | 0 | 0% | Tibetan |
bho | 40 | 0 | 0% | Bhojpuri |
nch | 39 | 0 | 0% | Central Huasteca Nahuatl |
mlg | 39 | 0 | 0% | Malagasy |
sjn | 38 | 0 | 0% | Sindarin |
lao | 38 | 0 | 0% | Lao |
fij | 38 | 0 | 0% | Fijian |
zul | 37 | 0 | 0% | Zulu |
cycl | 37 | 0 | 0% | CycL |
gbm | 36 | 0 | 0% | Garhwali |
yor | 35 | 0 | 0% | Yoruba |
pnb | 35 | 0 | 0% | Punjabi (Western) |
fuv | 35 | 0 | 0% | Nigerian Fulfulde |
apc | 35 | 0 | 0% | North Levantine Arabic |
hif | 34 | 0 | 0% | Fiji Hindi |
sme | 33 | 0 | 0% | Northern Sami |
pau | 33 | 0 | 0% | Palauan |
kjh | 33 | 4 | 12% | Khakas |
liv | 32 | 0 | 0% | Livonian |
niu | 31 | 25 | 81% | Niuean |
ady | 31 | 0 | 0% | Adyghe |
dng | 30 | 0 | 0% | Dungan |
tah | 29 | 28 | 97% | Tahitian |
sco | 29 | 0 | 0% | Scots |
che | 28 | 6 | 21% | Chechen |
aln | 28 | 0 | 0% | Albanian (Gheg) |
wol | 27 | 4 | 15% | Wolof |
mah | 27 | 0 | 0% | Marshallese |
ksh | 27 | 0 | 0% | Kölsch |
jam | 27 | 26 | 96% | Jamaican Patois |
akl | 27 | 27 | 100% | Aklanon |
ewe | 26 | 0 | 0% | Ewe |
abk | 26 | 0 | 0% | Abkhaz |
kin | 25 | 0 | 0% | Kinyarwanda |
kek | 24 | 0 | 0% | Kekchi (Q'eqchi') |
chr | 24 | 0 | 0% | Cherokee |
aoz | 24 | 24 | 100% | Uab Meto |
nya | 23 | 0 | 0% | Chinyanja |
myv | 23 | 0 | 0% | Erzya |
ain | 22 | 0 | 0% | Ainu |
roh | 21 | 0 | 0% | Romansh |
lkt | 21 | 0 | 0% | Lakota |
ibo | 21 | 19 | 90% | Igbo |
frm | 20 | 0 | 0% | Middle French |
ngt | 19 | 0 | 0% | Ngeq |
arg | 19 | 0 | 0% | Aragonese |
scn | 18 | 0 | 0% | Sicilian |
enm | 18 | 0 | 0% | Middle English |
ton | 17 | 0 | 0% | Tongan |
vec | 16 | 0 | 0% | Venetian |
sun | 16 | 0 | 0% | Sundanese |
pus | 16 | 0 | 0% | Pashto |
non | 15 | 0 | 0% | Old Norse |
nlv | 15 | 0 | 0% | Orizaba Nahuatl |
nan | 15 | 0 | 0% | Min Nan Chinese |
lmo | 15 | 0 | 0% | Lombard |
srd | 14 | 0 | 0% | Sardinian |
moh | 14 | 0 | 0% | Mohawk |
vro | 13 | 0 | 0% | Võro |
tvl | 13 | 11 | 85% | Tuvaluan |
gil | 13 | 0 | 0% | Gilbertese |
bam | 13 | 0 | 0% | Bambara |
som | 12 | 0 | 0% | Somali |
nau | 11 | 0 | 0% | Nauruan |
fur | 11 | 0 | 0% | Friulian |
brx | 11 | 6 | 55% | Bodo |
vep | 10 | 0 | 0% | Veps |
kam | 10 | 0 | 0% | Kamba |
mvv | 9 | 0 | 0% | Tagal Murut |
mnw | 9 | 0 | 0% | Mon |
jdt | 9 | 0 | 0% | Juhuri (Judeo-Tat) |
sag | 8 | 0 | 0% | Sango |
mic | 8 | 8 | 100% | Mi'kmaq |
mai | 8 | 0 | 0% | Maithili |
cjy | 8 | 3 | 38% | Chinese (Jin) |
quc | 7 | 0 | 0% | K'iche' |
mdf | 7 | 0 | 0% | Moksha |
mad | 7 | 0 | 0% | Madurese |
fuc | 7 | 0 | 0% | Pulaar |
tmw | 6 | 0 | 0% | Temuan |
mfe | 6 | 0 | 0% | Morisyen |
izh | 6 | 0 | 0% | Ingrian |
glv | 6 | 0 | 0% | Manx |
otk | 5 | 0 | 0% | Old Turkish |
bjn | 5 | 0 | 0% | Banjar |
oar | 4 | 0 | 0% | Old Aramaic |
hsn | 4 | 0 | 0% | Xiang Chinese |
hnj | 4 | 0 | 0% | Hmong Njua (Green) |
bcl | 4 | 2 | 50% | Bikol (Central) |
tyv | 3 | 0 | 0% | Tuvinian |
tso | 3 | 0 | 0% | Tsonga |
snd | 3 | 0 | 0% | Sindhi |
pfl | 3 | 0 | 0% | Palatine German |
osx | 3 | 0 | 0% | Old Saxon |
mwl | 3 | 0 | 0% | #N/A |
hak | 3 | 0 | 0% | Hakka Chinese |
gan | 3 | 0 | 0% | Gan Chinese |
cos | 3 | 0 | 0% | Corsican |
ban | 3 | 0 | 0% | Balinese |
tsn | 2 | 0 | 0% | Setswana |
sot | 2 | 0 | 0% | Southern Sotho |
ori | 2 | 0 | 0% | Odia (Oriya) |
lug | 2 | 0 | 0% | Luganda |
kaa | 2 | 0 | 0% | Karakalpak |
aym | 2 | 0 | 0% | Aymara |
aii | 2 | 0 | 0% | Assyrian Neo-Aramaic |
urh | 1 | 0 | 0% | Urhobo |
umb | 1 | 0 | 0% | Umbundu |
tkl | 1 | 0 | 0% | Tokelauan |
ssw | 1 | 0 | 0% | Swazi |
oji | 1 | 0 | 0% | Ojibwe |
mnc | 1 | 0 | 0% | Manchu |
lzz | 1 | 0 | 0% | Laz |
lou | 1 | 0 | 0% | Louisiana Creole |
kxi | 1 | 0 | 0% | Keningau Murut |
gag | 1 | 0 | 0% | Gagauz |
cyo | 1 | 0 | 0% | Cuyonon |
crs | 1 | 0 | 0% | Seychellois Creole |