မေႃႇၵျူး:place
Appearance
Documentation for this module may be created at မေႃႇၵျူး:place/doc
local export = {}
local force_cat = false -- set to true for testing
local m_placetypes = require("Module:place/placetypes")
local m_links = require("Module:links")
local m_strutils = require("Module:string utilities")
local m_table = require("Module:table")
local debug_track_module = "Module:debug/track"
local en_utilities_module = "Module:en-utilities"
local languages_module = "Module:languages"
local parse_interface_module = "Module:parse interface"
local parse_utilities_module = "Module:parse utilities"
local utilities_module = "Module:utilities"
local enlang = require(languages_module).getByCode("en")
local rmatch = m_strutils.match
local rfind = m_strutils.find
local ulen = m_strutils.len
local split = m_strutils.split
local dump = mw.dumpObject
local insert = table.insert
local concat = table.concat
local pluralize = require(en_utilities_module).pluralize
local extend = m_table.extend
local unpack = unpack or table.unpack -- Lua 5.2 compatibility
local internal_error = m_placetypes.internal_error
local process_error = m_placetypes.process_error
local placetype_data = m_placetypes.placetype_data
--[==[ intro:
===Introduction===
This module implements {{tl|place}}, which is a template for standardizing the description and categorization of
toponyms (terms that refer to locations such as cities, countries, rivers, etc.). The following modules support this
template:
* [[Module:place]]: The main module.
* [[Module:place/placetypes]]: A module containing data on placetypes, as well as utilities for working with placetypes;
category generation handlers for adding categories based on placetypes; and display handlers for displaying holonyms
(i.e. containing locations) of a specific type. FIXME: Maybe split out the code from the data.
* [[Module:place/locations]]: A module containing data on known locations, as well as utilities for working with
such locations. FIXME: Maybe split out the code from the data.
* [[Module:category tree/topic/Places]]: A category tree module for generating the descriptions of all
categories generated by {{tl|place}}.
* [[Module:place doc]]: A module that generates documentation tables describing known placetypes and locations.
===Basic terminology===
The basic terminology used in this and associated {{tl|place}} modules is:
* A ''location'' (or equivalently, a ''place'') is any geographic feature (either natural or geopolitical), either on
the surface of the Earth or elsewhere. Examples of types of natural places are rivers, mountains, seas and moons;
examples of types of geopolitical places are cities, countries, neighborhoods and roads. A ''known location'' is
specifically a location whose properties are specified in the {{tl|place}} modules; more on them below.
* Specific places are identified by names, referred to as ''toponyms'' or ''placenames''. A given place will often have
multiple names, and a given toponym may be ambiguous, referring to multiple possible locations. Specifically:
** There may be names including different amounts of disambiguating information (`Tucson` vs. `Tucson,
Arizona` vs. `Tucson, Arizona, USA` or `New York` vs. `New York City` vs. `New York, New York`); abbreviations (`NYC`
for `New York City`, `USA` for `United States of America`); ''official'' vs. ''short'' names (e.g. `Union of Soviet
Socialist Republics` vs. `Soviet Union`); spelling variations (`Cracow` vs. `Krakow` vs. `Kraków`); current vs
former names (`Saint Petersburg` vs. `Leningrad` vs. `Petrograd`); [[exonym]]s vs. [[endonym]]s (e.g. `Tavastia
Proper` vs. `Kanta-Häme`, both referring to the same administrative region in Finland); alternative names not due to
any of the above reasons (`Bashkiria` vs. `Bashkortostan`); etc. In addition, each language that has an opportunity
to refer to the place will have its own name, with the same sorts of variations as exist in English.
** Examples of ambiguous toponyms are `New York` (either a city or a state); `Georgia` (either a state of the US or an
independent country in the Caucasus Mountains); `Paris` (either the capital of France or various small cities and
towns in the US); `Mexico` (either a country, a state of that country, or the capital city of that country); and
`San Antonio` (besides being a major city in Texas, it is the name of dozens of settlements of all sorts throughout
the US and Latin America, and a least 181 distinct [[barangay]]s in the Philippines).
* A ''placetype'' is the (or a) type that a location belongs to (e.g. `city`, `state`, `river`, `administrative region`,
`[[regional county municipality]]`, etc.).
** It is common for locations to be described using multiple placetypes, and even sometimes known locations have
multiple placetypes that they may be identified by (e.g. American Samoa can be identified either as an `unincorporated
territory`, an `overseas territory` or just a `territory`). Both the {{tl|place}} template and the known location
data allow a given location to be identified by multiple placetypes. When in doubt as to the correct placetype or
placetypes for a given location, generally follow how Wikipedia describes the place.
** Some placetypes themselves are ambiguous; e.g. an ''area'' can variously refer to a top-level administrative division
(specifically of Kuwait); a geographic region, generally without unambiguously defined borders; or a section of a
city, similar to a neighborhood. The term ''district'' is similarly ambiguous. A ''[[prefecture]]'' in the context of
Japan is similar to a province, but a prefecture in France is the capital of a ''[[department]]'' (which is similar
to a county). Some of this ambiguity is currently handled automatically; e.g. the ambiguity of areas and districts is
handled by looking at the ''holonyms'', or containing locations, specified for a given place. But sometimes it is
necessary to use a qualifier before the placetype to disambiguate; for example to refer to a French prefecture, use
the placetype `French prefecture` instead of just `prefecture`. (FIXME: Handle this automatically.)
* A ''holonym'', in the context of a description of a place, is a placename that refers to a larger-sized entity that
contains the location being described. For example, `Arizona` and `United States` are holonyms of `Tucson`, and
`United States` is a holonym of `Arizona`.
* A ''place invocation'' consists of the invocation of {{tl|place}}, including all its parameters. Place invocations
may contain one or more ''place descriptions'', each of which provides a description of the location, including its
placetype or types, any holonyms, and any additional raw text needed to properly explain the place in context. Place
invocations may also contain named parameters specifying zero or more English ''glosses'' or translations (for
foreign-language toponyms) and any attached ''extra information'' such as the capital, largest city, official name,
modern name or full name. Multiple place descriptions in a single invocation are separated by a numbered parameter
starting with a semicolon, and are used when it is necessary to provide two or more definitions of a single location
for proper categorization. For example, [[Vatican City]] is defined both as a city-state in Southern Europe and as an
enclave within the city of Rome, follows:
: {{tl|place|en|city-state|r/Southern Europe|;,|an <<enclave>> within the city of <<city/Rome>>, <<c/Italy>>|cat=Cities in Italy|official=Vatican City State}}.
Similar things need to be done for places like [[Crimea]] that are claimed by two different countries with different
definitions and administrative structures.
** There are two types of place descriptions, ''new-style'' and ''old-style''. (The use of the terms "new" and "old"
indicates chronological precedence in the development of {{tl|place}}, but is not meant to pass any value judgments
on the two types, and does not indicate any intent to deprecate old-style descriptions. Both types of descriptions
are useful; for example, old-style descriptions are generally more succinct but less flexible.) The above invocation
shows both types: an old-style description followed by a new-style description. Old style descriptions use multiple
numbered parameters, where the first parameter (after the language code) specifies the placetype or types, and
following parameters specify either holonyms (which are always of the form ` ``placetype``/``placename`` `) or raw
text (which is identifiable by not having a slash in it). New-style descriptions use a single parameter, where both
placetypes and holonyms are surrounded by double angle brackets, and all remaining text is raw (displayed as-is). In
both types of descriptions, holonyms include a slash in them to separate the placetype (which is mandatory and often
abbreviated) from the placename.
** In the context of a place description, there are two types of placetypes. The ''entry placetypes'' are the placetypes
of the place being described, while the ''holonym placetypes'' are the placetypes of the holonyms that the place
being described is located within. Currently, a given place can have multiple placetypes specified (e.g. [[Normandy]]
is specified using the ''compound placetype'' `administrative region/former province/and/medieval kingdom`) while a
given holonym can have only one placetype associated with it. Holonym placetypes are frequently abbreviated (e.g.
`r` for `region`, `s` for `state`, `co` for `county`, etc.), while stylistically it is preferred to spell out the
entry placetype (except for some long placetypes with well-known abbreviations, such as `CDP` or `cdp` for
`[[census-designated place]]`).
** All holonyms in place descriptions are automatically linked as if surrounded by {{tl|l|en|...}}; i.e. if double
brackets do not occur in the holonym, the entire holonym will be linked to the corresponding Wiktionary article. For
this reason, the holonym should generally be in the same format as the canonical Wiktionary article describing the
location; see below).
* A ''known location'' is a location whose properties are specifically defined in the {{tl|place}} modules. Generally
each such location has an associated category, and known locations exist in a containment hierarchy, where the
immediately containing known location is known as the ''container'' of the location and the chain of successive
containing locations is known as the ''container trail''. Generally the location's container corresponds to the first
parent of its category. Note that some known locations belong to more than one immediate container; for example,
Russia belongs to both Europe and Asia.
===More about placetypes===
# The following general categories of placetypes exist:
## ''Natural features'' such as lakes, mountains, mountain ranges, islands, archipelagoes, moons, stars, asteroids, etc.
## ''Continents'', ''supercontinents'' (groupings of continents where it makes sense, such as `America` and `Eurasia`)
and ''continent-level regions'' (grouping of countries in a given continent, such as `Central America` and
`Polynesia`).
## ''Political entities'', which are generally classified as either ''polities'' (top-level entities such as countries),
''subpolities'' or ''political divisions'' (non-sovereign divisions, often specifically ''administrative divisions'',
of a polity, where an administrative division has a governmental or statistical function and almost always has
unambiguously defined boundaries), or ''settlements'' (e.g. cities; towns; villages; and divisions of a city such as
neighborhoods, wards, [[barrio]]s and [[barangay]]s, which may or may not be formal administrative divisions and
may or may not have unambiguous boundaries).
## ''Geographic regions'', which refer to recognized areas of the Earth (either with a natural geographic, political or
cultural significance, often of a historical nature). Such regions can be of greatly varying size, may exist either
within a single country or spanning multiple countries or (more often) parts of multiple countries, and may not have
well-defined boundaries. They should be distinguished from ''administrative regions'', which exist within a single
country and have well-defined boundaries and a political or administrative function. Geographic regions are
categorized using the generic term ''geographic and cultural areas'' to emphasize that (a) they have no
administrative significance; (b) they may vary greatly in size; and (c) their cohesion is due either to natural
geographic boundaries, such as rivers or mountain ranges, or to sharing some cultural characteristics.
## ''Man-made structures'' below the level of a settlement or neighborhood, such as airports, roads, individual
buildings, and the like. (Note that such structures, even if named, often do not meet the [[WT:CFI]] criteria; this
is particularly the case for roads.)
# Placetypes support aliases, and the mapping to canonical form happens early on in the processing. For example, `state`
can be abbreviated as `s`; `administrative region` as `adr`; `regional county municipality` as `rcomun`; etc. Some
placetype aliases handle alternative spellings rather than abbreviations. For example, `departmental capital` maps to
`department capital`, and `home-rule city` maps to `home rule city`. Placetype abbreviations are particularly useful
in holonym specs, because every holonym must be accompanied by its placetype, for disambiguation purposes.
# A ''placetype qualifier'' is an adjective prepended to the placetype to give additional information about the
place being described. For example, a given place may be described as a `small city`; logically this is still a city,
but the qualifier `small` gives additional information about the place. Multiple qualifiers can be stacked, e.g.
`small affluent beachfront unincorporated community`, where `unincorporated community` is a recognized placetype and
`small`, `affluent` and `beachfront` are qualifiers. (As shown here, it may not always be obvious where the qualifiers
end and the placetype begins.) For the most part, placetype qualifiers do not affect categorization; a `small city`
is still a city and an `affluent beachfront unincorporated community` is still an unincorporated community, and both
should still be categorized as such. But some qualifiers do change the categorization. In particular, a `former
province` is no longer a province and should not be categorized in e.g. [[:Category:Provinces of Italy]], but instead
in a different set of categories, e.g. [[:Category:Historical political subdivisions]]. There are several terms
treated as equivalent for this purpose: `abandoned` `ancient`, `extinct`, `historic(al)`, `medi(a)eval` and
`traditional`. Another set of qualifiers that change categorization are `fictional` and `mythological`, which cause
any term using the qualifier to be categorized respectively into [[:Category:Fictional locations]] and
[[:Category:Mythological locations]].
===More about toponyms===
# Toponyms may be:
## ''simple'' (not including any containing location in its name, such as `Tucson`) or ''multipart'' (including one or
more containing locations, such as `Tucson, Arizona` or `Tucson, USA` or even `Tucson, Arizona, USA`);
## ''bare'' (not including the word `the` if the location normally requires this article when following a preposition,
such as `United States`, `Gambia` or 'Community of Madrid') or ''prefixed'' (including the word `the` as needed, such
as `the United States`, `the Gambia` or `the Community of Madrid`);
## ''elliptical'' (just the placename without any disambiguating placetype, such as `Durham`, `New York` or `Mexico`) or
''full'' (containing a disambiguating placetype or similar identifier if one is commonly included, such as
the city of `Durham` (in England) vs. its containing county `County Durham`; the US city `New York City` vs. its
containing state `New York`; or the three-way distinction between `Mexico` (the country), `Mexico City` (the capital
of this country) and `(the) State of Mexico` (one of the states of the country Mexico, mostly surrounding but not
including Mexico City)).
# The ''canonical Wiktionary article'' is the main article on Wiktionary where a location is described. Canonical
articles, per the above terminology, are generally ''simple'' and ''bare'', but may be either ''full'' or
''elliptical''. The fact that a given article is canonical is often identifiable by the fact that translations are
housed there an not somewhere else. For example, most counties of the US and Canada include the word `County` in their
canonical article name, but most counties elsewhere do not. `Washington, D.C.` is one of the few cases where a
non-simple toponym is used as the canonical article; this is based on common usage, especially by residents of the
city in question (who commonly refer to it as "D.C." but rarely just as "Washington").
===More about known locations===
# The following types of known locations are defined in this module:
## Continents, supercontinents and continent-level regions, into which countries are grouped. Specifically:
### At the top level below `Earth` are the supercontinents `America` and `Eurasia` and the continents `Africa`,
`Oceania` and `Antartica`.
### `America` is further broken down into the continents `North America` (in turn containing the continental regions
`Central America` and `Caribbean`, with the United States, Canada and Mexico directly under North America) and
`South America`.
### `Eurasia` is further broken down into the continents `Europe` and `Asia`.
### `Oceania` is further broken down into the continental regions `Melanesia`, `Micronesia` and `Polynesia`, with
Australia` directly under `Oceania.
### Under the above-specified divisions are countries. Some countries are placed in more than one continent or
continent-level region, either because they actually span two continents (e.g. Russia, Turkey, Kazakhstan, Egypt) or
because they are politically considered to belong to a continent different from the one they are geographically in
(Cyprus, Georgia, Armenia, etc.).
## Political entities, including:
### Top-level political entities, which includes:
#### Countries, with a fairly liberal definition, notably including all UN-recognized countries plus some others that
are commonly considered countries, even if not all other countries recognize them as such or consider them
completely independent (notably, Kosovo, Palestine, Taiwan, Western Sahara, Niue and the Cook Islands).
#### Pseudo-countries, which include areas calling themselves countries that are de-facto not under the control of the
country that they are internationally considered part of (e.g. Abkhazia, South Ossetia, Transnistria);
dependent/external/etc. territories of countries (e.g. American Samoa [US], Bermuda [UK], Christmas Island
[Australia], Easter Island [Chile]); constituent countries, autonomous territories and the like (Aruba, Curaçao and
Sint Maarten of the Netherlands; Greenland and the Faroe Islands of Denmark; etc.; but notably not including
England, Scotland, Northern Ireland and Wales, which are treated as regular countries); and a grab bag of other
entities that have a semi-independent existence, such as Hong Kong, Macau, Guadeloupe, Martinique and the like.
Currently, the actual distinction in treatment between "countries" and "country-like entities" is minimal, but in
the future we might restrict the sorts of subcategories of country-like entities more than regular countries.
#### Former countries, e.g. the Soviet Union, Yugoslavia, West Germany and the Roman Empire. These are much more limited
in the sorts of subcategories allowed, because generally locations, especially cities, should be described from the
perspective of which political entity they are currently located in (e.g. "an ancient Roman town in modern Syria")
and categorized as such.
### Subpolities. Generally we only list top-level administrative divisions of countries (and only fairly major countries
are usually included), but sometimes we list second-level administrative divisions, as in the case of the
United Kingdom (where the top-level administrative divisions of the four constituent countries are listed) and China
(where major prefecture-level cities are listed, and are considered administrative divisions rather than cities).
### Cities. Only major cities get categories, with the definition of "major" varying by country but often including
those where the city population itself (sometimes the metro area) is >= 1,000,000 people.
# A distinction should be made in the {{tl|place}} modules between ''keys'' and ''placenames''. Placenames are as the
location appears in a holonym, and are generally in the same format as the canonical Wiktionary article describing the
location so that when formatted as a link, the link goes to the right article; i.e. they are simple and bare, and may
be full or elliptical according to Wiktionary conventions. The ''canonical key'' of a location is how the location's
category is named, and always uniquely identifies the location from among the known locations in this module (but
not necessarily among all possible locations). In particular, subpolities usually have multipart keys that include the
containing location, such as `Anhui, China` (not just `Anhui`); `Arizona, USA` (not just `Arizona`, and also not
`Arizona, United States`); and `Herefordshire, England` (not just `Herefordshire`, and also in this case not
`Herefordshire, UK` or `Herefordshire, England, UK` or any other possible variation). Cities are normally simple, but
some cities are multipart for disambiguation purposes (e.g. `Newcastle, New South Wales` for the city in Australia vs.
`Newcastle upon Tyne` for the identically-named city in England). Canonical keys may have ''key aliases'', other
ways of referring to the location that are not necessarily unique (e.g. `Newcastle` is a key alias for both of the
above-mentioned cities), and city keys with diacritics generally have diacriticless aliases, such as canonical key
`Düsseldorf` vs. key alias `Dusseldorf`, or canonical key `Łódź` vs. key alias `Lodz`.
# Known locations are gathered into ''groups'' with similar properties, such as all the states of the United States;
all the (ceremonial) counties of England (see below); and all the "sufficiently major" prefecture-level cities in
China (where a prefecture-level city is a prefecture surrounding a major city with a unified government and is more
like a prefecture, i.e. a major administrative division just underneath a province, than like a city, and where
"sufficiently major" is defined according to the population of either the total prefecture or the urban area of the
city). Note that there are multiple types of counties in England, with overlapping but non-identical names and
boundaries; there are, in particular, ''ceremonial counties'', ''local government counties'' and ''historic
counties''; ''ceremonial counties'' have only ceremonial administrative functionality but unlike local government
counties (a) don't frequently change their boundaries or nature, (b) correspond more closely to historic county
boundaries and names, and (c) are what Englanders usually identify themselves with, and so they are used as top-level
divisions rather than local government counties.
# Some known locations have ''aliases'' defined, which are of two types. ''Display aliases'' map holonyms to their
canonical form near the beginning of processing (in particular before the displayed output is formatted). For example,
`US`, `U.S.`, `USA`, `U.S.A.` and `United States of America` are all canonicalized to `United States` (if identified
as a country), and display as `United States`. Similarly, the foreign forms `Occitanie` (as a region or administrative
region) and `Noord-Brabant` (as a province) are mapped to `Occitania` and `North Brabant` for display purposes. There
are also ''category aliases'', so that if e.g. `Republic of Macedonia` is encountered, it will display as such but
categorize as `North Macedonia`. (This is because, among other reasons, `Republic of Macedonia` is normally preceded
by `"the"` while `North Macedonia` is not, so a call {{tl|place|en|a <<city>> in the <<c/Republic of Macedonia>>}}
would look wrong if `Republic of Macedonia` were converted to `North Macedonia` during display, as the result would be
`a city in the North Macedonia`. There are also frequently political connotations to different category aliases, e.g.
`Burma` vs. `Myanmar`.) All of these aliases are sensitive to the placetype specified. For example, `Mexico` as a
state is categorized under `State of Mexico, Mexico` but `Mexico` the country is categorized as just `Mexico`.
===Categories===
There are two main types of categories:
# Categories for known locations, divided into:
## Top-level polity categories (e.g. [[:Category:United States]], [[:Category:Taiwan]], [[:Category:South Ossetia]],
[[:Category:Bermuda]], [[:Category:Soviet Union]], [[:Category:West Germany]]).
## Subpolity categories ([[:Category:Arizona, USA]], [[:Category:Hunan]], [[:Category:Kagoshima Prefecture]],
[[:Category:Cluj County, Romania]]). For historical reasons, different formats are used for the subpolities of
different polities. Increasingly, we are moving towards always including the polity name in the subpolity category,
but whether the subpolity type is included and where it is included (cf. [[:Category:Cluj County, Romania]] vs.
[[:Category:County Cork, Ireland]] is still inconsistent and will probably remain that way, based on how the
subpolity is normally referred to.
## City categories ([[:Category:Tokyo]], [[:Category:New York City]], [[:Category:Jaipur]]). Normally these do not
include the containing subpolity, but may do so in order to disambiguate.
# Categories for placetypes, divided into:
## "Immediate" political and non-political division categories ([[:Category:States of the United States]],
[[:Category:Municipalities of Tocantins, Brazil]], [[:Category:Ghost towns in Arizona, USA]]). These are name
categories, whose purpose is to contain locations of the specified type. "Immediate" here refers to the fact that
the location in the category name is the immediately-containing polity. Usually these categories use the preposition
"of", but sometimes "in". (Specifically, "of" typically implies that the placetype in question has an official or
semi-official status, whereas "in" implies there is no such official status, but common usage may override this.)
The form of the toponym appearing in these categories is always the same as that of the corresponding toponym
category except that the word "the" may appear (e.g. [[:Category:States of the United States]]), whereas it doesn't
appear in the toponym category itself ([[:Category:United States]], no "the").
## "Skip-polity" categories for second-level political and non-political divisions of a country or other top-level
polity (e.g. [[:Category:Counties of the United States]], [[:Category:Municipalities of Brazil]] and
[[:Category:Subprefectures of Japan]]). These have several purposes:
* They group the immediate division categories mentioned previously.
* They categorize "straggler" topoynms that (often improperly) fail to mention the subpolity they belong to, but
only the top-level polity.
* If categories do not exist for the first-level divisions of a country (and sometimes even when they do), they group
all toponyms of the specified type for the specified country. For example, Lithuania is divided into first-level
counties and second-level municipalities, but since we don't currently have categories for Lithuanian counties,
all municipalities go under [[:Category:Municipalities of Lithuania]] rather than under a category for a specific
county. In addition, even though we do have categories for Japanese prefectures (a first-level division), all
subprefectures (a second-level division) go under [[:Category:Subprefectures of Japan]] because there aren't very
many of them (see below).
## "Generic placetype" categories, both of the immediate and skip-polity type (immediate
[[:Category:Cities in California, USA]] and [[:Category:Neighborhoods of the Bronx]]; skip-polity
[[:Category:Villages in Ivory Coast]], [[:Category:Geographic and cultural areas of England]],
[[:Category:Rivers in Egypt]] and [[:Category:Places in the Philippines]]). As mentioned above, "generic" placetypes
occur in every polity (although the set of generic placetypes allowed for cities is a subset of those allowed for
top-level polities and subpolities). Usually these categories use the preposition "in", but sometimes "of". As above,
skip-polity categories group immediate categories, and in addition there are various reasons a toponym entry is
categorized into a skip-polity category. (For example, as a general rule, geographic and cultural areas only
categorize at the country level, not the subpolity level, both because there often aren't very many in a given
country and because they often span multiple subpolities.)
The parent categories of a given category depend on its type. Generally, location categories have placetype categories
as their first parent, and vice-versa. Specifically:
# Top-level country categories have as their parent e.g. [[:Category:Countries in Europe]],
[[:Category:Countries in Central America]] or [[:Category:Countries in Polynesia]], using the most specific
continental-level region the country is contained in.
# Pseudo-countries are under [[:Category:Country-like entities]] as a neutral designation. There aren't enough of them
to subcategorize under continent-level regions.
# Former countries are under [[:Category:Former countries and country-like entities]].
# Subpolity categories are usually under a placetype category whose placetype is the canonical (first-listed) placetype
of the subpolity and whose toponym is the immediately containing polity, but there are exceptions. Specifically,
sometimes if a polity has multiple types of subpolities, they are combined (e.g. [[:Category:States and territories of
Australia]], [[:Category:Federal subjects of Russia]]). In addition, sometimes a less specific but more identifiable
placetype is used instead of the canonical one (e.g. [[:Category:Regions of France]] when the canonical placetype is
"administrative region"). The same rules and exceptions generally apply when categorizing subpolities themselves; e.g.
both the Australian state of Queensland and territory of Northern Territory go under
[[:Category:en:States and territories of Australia]] rather than separately under [[:Category:en:States of Australia]]
and [[:Category:en:Territories of Australia]]. In addition, sometimes subpolities may "skip a level" if there aren't
very many. For example, there are only 26 subprefectures of Japan (14 under Hokkaido and 12 more scattered under five
other prefectures). Rather than have e.g. [[:Category:en:Subprefectures of Kagoshima Prefecture]] containing at most
two entries and [[:Category:en:Subprefectures of Miyazaki Prefecture]] containing at most one, they are all grouped
under the so-called "skip-subpolity category" [[:Category:en:Subprefectures of Japan]].
# City categories are always under e.g. [[:Category:Cities in the United States]] (e.g. [[:Category:New York City]] is
so-placed, even though [[:Category:Cities in New York, USA]] exists). However, they may have a second, more-specific
parent (e.g. [[:Category:Cities in New York, USA]] in the case of New York City). The city entries themselves will
go under the more specific parent if it exists.
# Immediate placetype categories for second-level divisions of a country generally have, respectively, a
"toponym parent" that is the toponym mentioned in the category and a "skip-polity parent" that groups all subpolity
placetype categories of a specific type and containing polity. For example, [[:Category:Counties of Arizona, USA]] has
toponym parent [[:Category:en:Arizona, USA]] and skip-polity parent [[:Category:en:Counties of the United States]].
Sometimes the default skip-polity parent is overridden or disabled entirely. For example, in the US, most states are
divided into counties but Louisiana is divided into parishes and Alaska into boroughs. It would make no sense to put
[[:Category:Parishes of Louisiana, USA]] under [[:Category:Parishes of the United States]] (which would only have one
subcategory), so we include them under [[:Category:Counties of the United States]]. An alternative would be to name
the skip-polity category to explicitly include parishes and boroughs; this would get awkward here but is done in some
cases. Similarly, [[:Category:Regional county municipalities of Quebec]] is placed under
[[:Category:Regional municipalities of Canada]] since that name is used in other provinces. Meanwhile,
[[:Category:Regional districts of British Columbia]] disables its skip-polity category since no other province or
territory of Canada has regional districts or comparable subpolities under a different name (an alternative would be
to place them under [[:Category:Counties of Canada]], since they are sort of comparable to counties).
# Placetype categories for first-level divisions of a country similarly (e.g. [[:Category:States of the United States]])
have a toponym parent (in this case [[:Category:United States]]), but in place of the skip-polity parent they have two
other parents: a "bare placetype" parent (in this case [[:Category:States]]) and the "generic" parent
[[:Category:Political divisions of specific countries]]. (There is also a bare [[:Category:Political divisions]]
that groups "bare placetype" categories.) Skip-polity placetype categories for second-level divisions of a country
(e.g. [[:Category:Counties of the United States]]) work the same. Placetype categories for countries work likewise
except they are missing the generic parent.
===Place descriptions===
A given place description is defined internally in a table of the following form:
```{
placetypes = {"``placetype``", "``placetype``", ...},
holonyms = {
{ -- holonym object; see below
placetype = "``placetype``" or nil,
display_placename = "``placename``",
unlinked_placename = "``placename``",
langcode = "``langcode``" or nil,
no_display = BOOLEAN,
needs_article = BOOLEAN,
force_the = BOOLEAN,
affix_type = "``affix_type``" or nil,
pluralize_affix = BOOLEAN,
suppress_affix = BOOLEAN,
continue_cat_loop = BOOLEAN,
},
...
},
order = { ``order_item``, ``order_item``, ... }, -- (only for new-style place descriptions),
joiner = "``joiner_string``" or nil,
holonyms_by_placetype = {
``holonym_placetype`` = {"``placename``", "``placename``", ...},
``holonym_placetype`` = {"``placename``", "``placename``", ...},
...
},
}```
Holonym objects have the following fields:
* `placetype`: The canonicalized placetype if specified as e.g. `c/Australia`; nil if no slash is present (in which case
the placename in `display_placename` refers to raw text).
* `display_placename`: The placename or raw text, in the format to be displayed. Placename display aliases have already
been resolved. It is raw text if `placetype` is nil.
* `unlinked_placename`: Same as `display_placename` but with links and HTML removed.
* `langcode`: The language code prefix if specified as e.g. `c/fr:Australie`; otherwise nil.
* `no_display`: If true (holonym prefixed with !), don't display the holonym but use it for categorization.
* `needs_article`: If true, prepend an article if the placename needs one (e.g. `United States`).
* `force_the`: If true, always prepend the article `the`. Example use: holoynm 'city:pref:the/Gold Coast', which gets
formatted as `(the) city of the [[Gold Coast]]`.
* `affix_type`: Type of affix to prepend (values `pref` or `Pref`) or append (values `suf` or `Suf`). The actual affix
added is the placetype (capitalized if values `Pref` or `Suf` are given), or its plural if
`pluralize_affix` is given. Note that some placetypes (e.g. `district` and `department`) have inherent
affixes displayed after (or sometimes before) them.
* `pluralize_affix`: Pluralize any displayed affix. Used for holonyms like `c:pref/Canada,US`, which displays as
`the countries of Canada and the United States`.
* `suppress_affix`: Don't display any affix even if the placetype has an inherent affix. Used for the non-last
placenames when there are multiple and a suffix is present, and for the non-first placenames when
there are multiple and a prefix is present.
* `continue_cat_loop`: If true (holonym used :also), continue producing categories starting with this holonym when
preceding holonyms generated categories.
Note that new-style place descs (those specified as a single argument using <<...>> to denote placetypes, placetype
qualifiers and holonyms) have an additional `order` field to properly capture the raw text surrounding the items
denoted in double angle brackets. The ``order_item`` items in the `order` field are objects of the following form:
```{
type = "``order_type``",
value = "STRING" or INDEX,
}```
Here, the ``order_type`` is one of `"raw"`, `"qualifier"`, `"placetype"` or `"holonym"`:
* `"raw"` is used for raw text surrounding `<<...>>` specs.
* `"qualifier"` is used for `<<...>>` specs without slashes in them that consist only of qualifiers (e.g. the spec
`<<former>>` in `<<former>> French <<colony>>`).
* `"placetype"` is used for `<<...>>` `specs without slashes that do not consist only of qualifiers.
* `"holonym"` is used for holonyms, i.e. `<<...>>` specs with a slash in them.
For all types but `"holonym"`, the value is a string, specifying the text in question. For `"holonym"`, the value is a
numeric index into the `holonyms` field.
It should be noted that placetypes and placenames occurring inside the holonyms structure are canonicalized, but
placetypes inside the placetypes structure are as specified by the user. Stripping off of qualifiers and
canonicalization of qualifiers and bare placetypes happens later.
The information under `holonyms_by_placetype` is redundant to the information in holonyms but makes categorization
easier. The holonym placenames listed here already have category aliases applied.
For example, the call {{tl|place|en|city|s/Pennsylvania|c/US}} will result in the return value
```{
placetypes = {"city"},
holonyms = {
{ placetype = "state", display_placename = "Pennsylvania", unlinked_placename = "Pennsylvania" },
{ placetype = "country", display_placename = "United States", unlinked_placename = "United States" },
},
holonyms_by_placetype = {
state = {"Pennsylvania"},
country = {"United States"},
},
}```
Here, the placetype aliases `s` and `c` have been expanded into `state` and `country` respectively, and the placename
display alias `US` has been expanded into `United States`. PLACETYPES is a list because there may be more than one. For
example, the call {{tl|place|en|city/and/municipality|p/[[Kwango]] Province|c/Congo}} will result in the return value
```
{
placetypes = {"city", "and", "municipality"},
holonyms = {
{ placetype = "province", display_placename = "[[Kwango]] Province", unlinked_placename = "Kwango Province" },
{ placetype = "country", display_placename = "Congo", unlinked_placename = "Congo" },
},
holonyms_by_placetype = {
country = {"Congo"},
},
}```
Here, the `unlinked_placename` field has removed links from `display_placename`.
The value in the key/value pairs is likewise a list; e.g. the call {{tl|place|en|city|s/Kansas|and|s/Missouri}} will
return
```
{
placetypes = {"city"},
holonyms = {
{ placetype = "state", display_placename = "Kansas", unlinked_placename = "Kansas" },
{ display_placename = "and", unlinked_placename = "and" },
{ placetype = "state", display_placename = "Missouri", unlinked_placename = "Missouri" },
},
holonyms_by_placetype = {
state = {"Kansas", "Missouri"},
},
}
```
Note that in `get_cats()` (which runs after the display form has been generated), further changes to the holonym
structure are made to aid in categorization. For example, after `handle_category_implications()` and
`augment_holonyms_with_container()` are called, the above structure will look more like
```
{
placetypes = {"city"},
holonyms = {
{ placetype = "state", display_placename = "Kansas", unlinked_placename = "Kansas" },
{ placetype = "country", unlinked_placename = "United States" },
{ display_placename = "and", unlinked_placename = "and" },
{ placetype = "state", display_placename = "Missouri", unlinked_placename = "Missouri" },
{ placetype = "country", unlinked_placename = "United States" },
},
holonyms_by_placetype = {
state = {"Kansas", "Missouri"},
country = {"United States"}
},
}
```
===Category determination===
The algorithm to find the categories to which a given place belongs works off of a place description (which specifies
the entry placetype(s) and holonym(s); see above). If there are multiple place descriptions, each is processed
independently to generate categories. Likewise, if there are multiple entry placetypes in a given place description,
each is processed independently with all the holonyms of the description to generate categories. Furthermore, before
the category-generation algorithm runs, earlier steps have modified the holonyms of the place description (inserting
containing polities whenever possible; see the description above of `handle_category_implications()` and
`augment_holonyms_with_container()`).
Given a single entry placetype and a place description, the algorithm to generate categories processes holonyms from
left to right until it finds one that "matches" in that it produces one or more categories. At that point it attempts
to generate categories for all other holonyms in the place description of the same placetype. Normally, it then stops
processing holonyms, but if a holonym is marked using the `:also` modifier, the category generation process starts over
starting with that holonym (or the leftmost such remaining holonym, if there is more than one marked with `:also`).
This makes it possible, for example, to specify the description of a river that passes through two different types of
political divisions (e.g. Alberta and the Northwest Territories), or categorize a geographic region at both the
continent and country level, such as this:
<pre>
{{place|en|historical region|r/Eastern Europe|located in southeastern|c:also/Poland|*and western|c/Ukraine}}
</pre>
Here, `r/Eastern Europe` has a category implication that adds `cont/Europe` as a holonym directly after it, which
causes the page to be categorized into [[:Category:en:Geographic and cultural areas of Europe]]. The category generation
process would normally stop at this point, but the presence of `:also` causes it to restart with `c/Poland` and
generate the category [[:Category:en:Geographic and cultural areas of Poland]]. After doing this, it looks for other
holonyms of the same placetype as `c/Poland` (i.e. other countries), which causes it to process `c/Ukraine` and generate
the category [[:Category:en:Geographic and cultural areas of Ukraine]].
The category generation process works off of the `placetype_data` table, which specifies various properties for
placetypes, such as how to display a holonym of that placetype as well as how to categorize certain pages where the
{{tl|place}} call contains the specified placetype as an entry placetype. For example, the entry for `city-state` in
[[Module:place/placetypes]] might look like
```
["city-state"] = {
link = true,
category_link = "[[sovereign]] [[microstate]]s consisting of a single [[city]] and [[w:dependent territory|dependent territories]]",
has_neighborhoods = true,
class = "settlement",
["continent/*"] = {"City-states", "Cities", "မိူင်း", "မ်ိူငးတီႈၼႂ်း+++", "National capitals"},
default = {"City-states", "Cities", "Countries", "National capitals"},
},
```
Here, the keys specify, respectively:
# If `city-state` occurs as an entry placetype, link it to the corresponding Wiktionary entry (that is what `true` means
in `link = true`).
# Use the specified `category_link` text for categories such as [[:Category:City-states]].
# City-states are "city-like", i.e. they have neighborhoods; this controls the handling of entry placetypes such as
`neighborhood`, `district`, `area`, etc.
# City-states should be treated as settlements for determining how to handle the placetype `former city-state` and for
categorizing the bare category [[:Category:City-states]] and language-specific equivalents such as
[[:Category:en:City-states]].
# When the entry placetype `city-state` occurs along with a continent holonym, categorize into the specified categories
under `continent/*`. Here, `+++` stands for the holonym in question.
# When the entry placetype `city-state` occurs in any other context, categorize into the specified categories under
`default`.
It's important to realize that the only categorization keys under a given placetype entry that are specified
explicitly in [[Module:place/placetypes]] are certain wildcard keys such as `continent/*` above (i.e. containing a slash
followed by `*`) and under the key `default`. All the remaining categorization happens through category handlers, based
on the information on known locations in [[Module:place/locations]]. For example, [[Module:place/locations]] has an
"England group" specified similarly to the following:
```
export.england_group = {
default_container = {key = "England", placetype = "constituent country"},
default_placetype = "county",
default_divs = {
"districts",
{type = "local government districts", cat_as = "districts"},
{
type = "local government districts with borough status",
cat_as = {"districts", "boroughs"},
},
{type = "boroughs", cat_as = {"districts", "boroughs"}},
"civil parishes",
},
default_british_spelling = true,
data = export.england_counties,
}
```
The `default_divs` key here specifies the divisions that exist for each of the counties listed under the `data` key
(unless the key overrides them). Here, the entry `{type = "boroughs", cat_as = {"districts", "boroughs"}}` directs the
category handler `political_division_cat_handler` in [[Module:place/placetypes]] (which is one of two category handlers that
run for all entry placetypes, along with `generic_place_cat_handler`) to categorize boroughs specified under any of the
counties listed under `data` as both districts and boroughs.
Now, the categorization process proceeds as follows, given an entry placetype and place description, which specifies a
set of holonyms (the code to do this is in `get_placetype_cats()`):
# First, look up the entry placetype and any equivalent placetypes in `placetype_data`, which is defined in
[[Module:place/placetypes]]. Note that the entry in `placetype_data` that specifies the placetype information that is used
to determine the category or categories may not directly correspond to the entry placetype as specified in the place
description. For example, if the entry placetype is `small town`, the placetype whose data is fetched will be `town`
since `small` is a recognized qualifier and there is no entry in `placetype_data` for `small town`. As another
example, if the entry placetype is `administrative capital`, the code will first look up `administrative capital` and
then look up `capital city`, which is where the category handler is found, because `administrative capital` specifies
`capital city` as its fallback.
# Then, iterate over holonyms from left to right, as described above. For each holonym, we proceed as follows:
## First, call `political_division_cat_handler` to check if the entry placetype and holonym match a division in the
`locations` data in [[Module:place/locations]], as in the example above. Note that when doing this, holonyms are
canonicalized so that e.g. `co/Bedfordshire` gets mapped to `county/Bedfordshire` (because there is an entry in
`placetype_aliases` in [[Module:place/placetypes]] that maps `co` to `county`) and `c/USA` gets mapped to
`country/United States` (because there is an entry in the location data for the list of countries that maps
`country/USA` to `country/United States` for both display and categorization purposes). This category handler, as
with all such handlers, is passed the entry placetype and holonym being processed, but is also passed the entire
place description, so it can look at other specified holonyms (particularly those that follow). It either returns
{nil} or a list of category specs (which are the actual categories minus the preceding language code).
## If `political_division_cat_handler` doesn't generate any categories, check if there is a category handler defined
using the `cat_handler` key for the entry placetype. If so, call it to generate the categories (if any).
## If the category handler returns {nil}, or there is no category handler, look for a ''wildcard key'' of the format
e.g. `country/*`, which matches any holonym of placetype `country`. If found, the value is a list of category specs,
which are processed as above.
## If we get this far without generating any categories, move to the next holonym.
## If we do generate any categories, process all other holonyms of the same placetype. For example, if the user says
{{tl|place|en|city|s/Kansas|and|s/Missouri}}, when we get to the holonym `s/Kansas`, we generate the category
[[:Category:en:Cities in Kansas, USA]]. This causes us to look for other holonyms of the same placetype `state`,
and process them accordingly, generating a category [[:Category:en:Cities in Missouri, USA]] as well. The same thing
happens in an invocation like {{tl|place|pl|river|c/Poland,Ukraine,Belarus}}.
# Once we generate categories for a holonym and any other holonyms of the same placetype, we normally stop processing
holonyms. But if a holonym has the `:also` modifier, we restart the left-to-right loop at that holonym. For example,
in the invocation {{tl|place|en|river|flowing through|p/Alberta|p/British Columbia|and the|terr/Northwest Territories}},
we will generate a category [[:Category:en:Rivers in Alberta, Canada]] as well as
[[:Category:en:Rivers in British Columbia, Canada]] (because British Columbia is of the same placetype as Alberta);
but no category will be generated for the Northwest Territories, which is of a different placetype. To fix this, write
{{tl|place|en|river|flowing through|p/Alberta|p/British Columbia|and the|terr:also/Northwest Territories}}. The use
of `:also` will cause holonym processing to resume at `Northwest Territories` after `Alberta` is processed, leading to
an additional category [[:Category:en:Rivers in the Northwest Territories, Canada]]. (The presence of `the` in this
last category is because `Northwest Territories` is a known location with a spec indicating that it should be preceded
by `the`; it has nothing to do with the raw text `and the` in the invocation.)
# Finally, if we process all holonyms and don't end up producing any categories, we check the entry placetype's data for
a `default` key. If found, it lists category specs, which are processed to generate categories. This is used, for
example, in the placetype `city-state`, as described above.
# It should be noted that the above process runs independently for each combination of entry placetype and place
description. Thus, for example, an invocation {{tl|place|en|city/and/county|s/Kansas,Missouri|c/USA}} will generate
categories for both cities and counties in both Kansas and Missouri.
# Two additional sources of categories are ''bare location'' categories and ''generic place'' categories. These
categories are added by appropriate calls in the outer function `get_cats`, which iterates over placetypes and place
descriptions, calling `get_placetype_cats` on each combination.
## Bare location categories are categories like [[:Category:Arizona, USA]] that are related-to categories containing
terms related to the specified location. The bare location code, for example, adds the term [[Arizona]], and its
equivalents in other languages, to [[:Category:Arizona, USA]]. When looking for terms to consider, it checks the
pagename, the glosses specified using {{para|t}}, and the terms specified using {{para|modern}}, {{para|short}} and
{{para|full}}. It looks to see if any of these parameters match any known locations, but only adds them to a bare
location category if (a) the specified entry placetype matches, so that for example Russian `[[Джорджия]]` goes into
[[:Category:Georgia, USA]] while `[[Грузия]]` goes into [[:Category:Georgia]] (the country), even though both have a
gloss `Georgia`; and (b) there are no conflicting holonyms, so that for example the Old English term [[Munucceaster]]
if defined similarly to {{tl|place|ang|city|in modern|cc/England|t=Newcastle}} won't get added to
[[:Category:Newcastle, New South Wales]] (even though it is also a city) because the latter city is known to be in
Australia, which conflicts with the country `United Kingdom` (added internally to the Old English place description
through the holonym augmentation process, based on the holonym `cc/England`).
## Generic place categories are categories like [[:Category:Places in Kansas, USA]] and [[:Category:Places in England]]
that contain places of arbitrary placetype. These are added through a special category handler that operates like
other category handlers but is run for all placetypes, rather than only for the specified one(s).
]==]
--[=[
TODO/FIXME:
1. Neighborhoods should categorize at the city level. Categories like [[:Category:Places in Los Angeles]] exist but
not [[:Category:Neighborhoods in Los Angeles]]; we can refactor the code in generic_cat_handler() to support this
use case. [DONE]
2. Display handlers should be smarter. For example, 'co/Travis' as a holonym should display as 'Travis County' in the
United States, but (I think) display handlers don't currently have the full context of holonyms passed in to allow
this to happen.
3. Connected to this, we have various display handlers that add the name of the holonym after or (sometimes) before the
placename if it's not already there. An example is the county_display_handler() in [[Module:place/placetypes]], which adds
"County" before Ireland and Northern Ireland counties and after Taiwan and Romania counties. This should be
integrated into the polity group for these respective polities through a setting rather than requiring a separate
handler that has special casing for various polities.
4. Placetypes for toponyms should also have display handlers rather than just fixed text. This should allow us to
dispense with the need for special types for "fpref" = "French prefecture" (which displays as "prefecture" but links
to the appropriate Wikipedia article on Frenc prefectures, which are completely different from the more general
concept of prefecture). Similarly for "Polish colony" and "Welsh community". ("Israeli settlement" should probably
stay as-is because it displays as "Israeli settlement" not just "settlement".)
5. Currently, categories for e.g. states and territories of Australia go into
[[:Category:States and territories of Australia]] but terms for states and territories of Australia go into
(respectively) [[:Category:States of Australia]] and [[:Category:Territories of Australia]]. We should fix this;
maybe this is as easy as setting cat_as in the respective divs definitions. [DONE]
6. Probably cat_as should support raw categories as well as category types; raw categories would be indicated by being
prefixed with "Category:".
7. Update documentation. [PARTLY DONE]
8. Rename remaining political division categories to include name of country in them. [DONE]
9. Add Pakistan provinces and territories. [DONE]
10. Add a polity group for continents and continent-level regions instead of special-casing. This should make it
possible e.g. to have Jerusalem as a city under "Asia". [DONE]
11. Add better handling of cities that are their own states, like Mexico City. [DONE]
12. Breadcrumb for e.g. [[Category:Aguascalientes, Mexico]] is "Aguascalientes, Mexico" instead of just
"Aguascalientes". [DONE]
13. Unify aliasing system; cities have a completely different mechanism (alias_of) vs. polities/subpolities (which use
`placename_cat_aliases` and `placename_display_aliases` in [[Module:place/placetypes]]). [DONE]
14. More generally, cities should be unified into the polity grouping system to the extent possible; this would allow
for divs of cities (see #17 below). [DONE]
15. We have `no_containing_polity_cat` set for Lebanon, Malta and Saudi Arabia to prevent country-level implications
from being added due to generically-named divisions like "North Governorate", "Central Region" and
"Eastern Province" but (a) this setting seems to do multiple things and should be split, (b) it should be possible
to set this at the division level instead of the country level. [DONE]
16. Split out the data from the handlers so we can use loadData() on the data because it's becoming very big.
17. Cities like Tokyo have special wards; "prefecture-level cities" like Wuhan (which aren't really cities but we treat
them as such) have districts, subdistricts, etc. We need to support divs for cities and even named divisions of
cities (such as we already have for boroughs of New York City). [DONE]
18. It should be allowed to set 'true' to any qualifier (which links it) and have it work correctly; qualifier lookup
in [[Module:place]] needs to remove links first.
19. Categories 'Historical polities' and 'Historical political subdivisions' should be renamed 'Former ...' since
"historic(al)" is ambiguous (cf. "historic counties" in England which are not former, but still have a legal
definition). [DONE]
20. It should be possible to categorize former subpolities of certain polities; cf. [[:Category:ja:Provinces of Japan]],
which contains former provinces. [PARTLY DONE; SUPPORT IS THERE BUT FORMER PROVINCES NOT YET CATEGORIZED]
21. In subpolity_keydesc(), we need to generate the correct indefinite article and have a huge hack to check
specifically for "union territory", which is the only placetype that shows up in this function where the default
indefinite article generating function fails. To fix this properly, we need to separate out the non-category
placetype data from `cat_data` in [[Module:place/placetypes]] and move it to [[Module:place/locations]], because we
don't have access to the data in [[Module:place/placetypes]], and that data indicates the correct article for placetypes
like "union territory". [DONE]
22. Simplify the specs in `cat_data`, eliminating the distinction between "inner" and "outer" matching. There should not
be two levels, just one. For example, in "district", instead of
["country/Portugal"] = {
["itself"] = {"Districts and autonomous regions of +++"},
}
we should just have
["country/Portugal"] = {"Districts and autonomous regions of +++"},
And in "dependent territory", instead of
["default"] = {
["itself"] = {true},
["country"] = {true},
},
we should just have
["itself"] = {true},
["country/*"] = {true},
It appears the only remaining spec that can't be easily converted in this fashion is for "subdistrict":
["country/Indonesia"] = {
["municipality"] = {true},
},
This seems to be specifically for Jakarta and doesn't seem to work anyway, as the two entries in
[[:Category:en:Subdistricts of Jakarta]] and the one entry in [[:Category:id:Subdistricts of Jakarta]] are manually
categorized. [DONE]
23. Consolidate the remaining stuff in [[Module:category tree/topic cat/data/Earth]] into
[[Module:category tree/topic cat/data/Places]]. [DONE]
24. The `generic_cat_handler` that categorizes into `Places in FOO` is smart enough not to categorize cities that are
in different polities from the specified containing polity/polities of the city, but doesn't do the same for
larger-level divisions. Likewise for the `city_type_cat_handler`. There are some sufficiently generically-named
divisions that this issue can occur; for example, [[Koforidua]], the capital city of Eastern Region, Ghana, is
incorrectly categorized under [[:Category:en:Cities in Eastern Region, Malta]] and
[[:Category:en:Places in Eastern Region, Malta]]. Note that the function `augment_holonyms_with_container`
''DOES'' do such checks, so we should be able to refactor the code out of that function and use it elsewhere. [DONE]
25. The `generic_cat_handler` that categorizes into `Places in FOO` is smart enough not to categorize cities that are
in different polities from the specified containing polity/polities of the city; but how smart is it? It will
successfully avoid categorizing a neighborhood in e.g. [[Columbus]], [[Georgia]] that doesn't explicitly mention the
US (only `s/Georgia`) into [[:Category:en:Places in Columbus]], which is for Columbus, Ohio, but will it do the same
for a hypothetical neighborhood of Columbus in say Merseyside, England? This should be investigated. It will
probably work for a hypothetical Columbus in [[Canada]] because `augment_holonyms_with_container` would
auto-add Canada as an additional holonym once say `p/Ontario` is mentioned, but I think there's a setting preventing
this augmentation from happening for the UK. (This relates to FIXME #15. `no_containing_polity_cat` is set on
England, Scotland, etc. to prevent the toponyms from being added to [[:Category:en:Places in the United Kingdom]],
but this same setting is used to prevent augmentation, which it should not be; there should be different settings.)
[DONE]
26. The `generic_cat_handler` (or more specifically `find_holonym_keys_for_categorization`) checks for city holonyms
by looking specifically for holonym type `city`. But some cities (particularly those in China) can be specified
using different holonym types, e.g. `prefecture-level city`, `subprovincial city`, etc. We should allow these when
appropriate (which means the cities in China need to have a `placetype` set that indicates their regional-level
status as well as just `city`). I'm not sure if cities support specifying a custom `placetype` at the moment; this
relates to FIXME #14 above concerning unifying cities and political divisions internally. [DONE]
27. The bare category handler (`get_bare_categories` in [[Module:place/placetypes]]) is not smart enough to avoid
overcategorizing cities or other divisions that are of the right placetype but in the wrong containing polity. For
example, Asturian [[Llión]] "León (city in Spain)" gets put in [[:Category:ast:León]] even though the latter is
supposed to refer to a city in Mexico. We can borrow the check-containing-polity code from `generic_cat_handler`.
[DONE]
28. Redo handling of singular and plural to respect overrides specified in placetype_data. Check more carefully for
things that may not singularize correctly, e.g. 'passes' -> 'passe'? Definitely 'headquarters' and variants. [DONE]
29. Combine placetype_equivs and other placetype data into `placetype_data`. Figure out if we need the distinction
between `placetype_equivs` and `fallback`. [DONE]
30. `has_neighborhoods` may need to be a function that can look at the containing holonyms to determine whether the
entity in question is city-like.
31. Bare placenames as they appear in holonyms (e.g. `Riau Islands`) instead of category keys (e.g.
`the Riau Islands, Indonesia`) should appear in the polity data tables. As a first pass, the word "the" should not
appear but should instead be a property of the polity. [DONE]
32. `capital_city_cat_handler` should use `get_holonyms_to_check()`. [DONE]
33. The code to generate and parse the correct preposition ("in" or "of") is very convoluted, and the actual preposition
used is specified in various locations with various defaults, sometimes hardcoded. This should be simplified. It is
made more difficult by the fact that the in/of distinction occurs in several places:
(a) when generating the {{place}} text in old-style descriptions where the preposition isn't explicitly given, which
uses the `preposition` setting in placetype_data, defaulting to "in";
(b) when generating categories based on explicit category specs in placetype_data (which are gradually being
deprecated), which likewise uses the `preposition` setting in placetype_data, defaulting to "in";
(c) when generating categories based on political_division_cat_handler, originating in the `divs` placetypes for
specific known locations in [[Module:place/locations]], which uses the `prep` setting embedded in the `divs`
specifications, defaulting to "of";
(d) when generating categories based on category handlers specified using the `cat_handler` property of entries in
placetype_data, which tend to hardcode "in" or "of" depending on the specific category handler;
(e) when generating category descriptions in [[Module:category tree/topic/Places]] for `divs` categories generated
in (c), which (correctly) uses the same `prep` setting embedded in the `divs` settings that is used when
generating the categories themselves;
(f) when generating category descriptions for categories generated in (b) and (d) above, which relies on the
`generic_before_non_cities` and `generic_before_cities` settings in placetype_data, which need to match the
corresponding prepositions hardcoded in the category generation handlers. Instead of the hardcoding, the
category generation handler should respect the `generic_before_*` settings. [PARTLY DONE]
34. [[Krakow]] defined as {{place|en|A <<city>> on the [[Vistula]] River, the <<capital>> of the <<voi/Lesser Poland Voivodeship>> in southern <<c/Poland>>}}
categorizes under [[:Category:Voivodeship capitals]] when it should probably instead be under
[[:Category:Voivodeship capitals of Poland]]. Possibly this is because the various voivodeships haven't yet been
entered as known locations, but this should happen regardless of that.
35. {{tcl}} bugs:
a. Lowercase initial letter in new-style {{place}} descriptions in {{tcl}}. Maybe we can have a setting tcl_nolc=1
to prevent this from happening.
b. tcl= and probably new-style {{place}} descriptions in general should recognize ;; to separate distinct {{place}}
descriptions, and similarly ;;and as the equivalent of regular `;and`, etc.
c. The value supplied in `modern=` should be displayed in {{tcl}} descriptions regardless of the setting that
normally disables this, so that e.g. the foreign-language equivalent of [[British Honduras]] doesn't just say
it's a former British colony in Central America but specifically identifies it as modern Belize. If the user
gives, place_modern= in {{tcl}}, that should override the modern= value and still display.
d. The page supplied to {{tcl}} should be used for generating bare categories even if t= is supplied and overrides
the English term displayed. [DONE]
36. County boroughs used as holonyms currently display 'borough county borough' because there's an affix setting for
'county borough' and a fallback display handler for 'borough'. We need to rethink this; maybe merge the affix
setting and display handlers.
37. Implement known-location groups and specs in a more standardly object-oriented way using metatables.
38. Implement caching of known location lookup in the holonym. This may have to be keyed by placetype, but we can have a
special field for when the lookup placetype is the same as the user-specified placetype of the holonym. Use this
known location in place of looking up known locations and store the appropriate known location there in
`augment_holonyms_with_container()` instead of calling `key_to_placename`.
39. Bug fixes with 'the': [[Kazaň]] defined as {{place|cs|caplc|rep:Pref/Tatarstan|c/Russia|t1=Kazan}} displays as
"Republic of the Tatarstan". Possibly related: [[Valday]] defined as
{{place|en|town/administrative center|dist:Suf/Valdaysky|obl/Novgorod|c/Russia}} displays as "a town, the
administrative center of the Valdaysky District".
40. Bug fix with 'the': [[Verkhoyansk]] defined as {{place|en|town|rep/Sakha|c/Russia}} displays as "a town in the
Sakha". [DONE]
41. [[Category:Cities in Asia]] has [[Category:Cities in Eurasia]] as a parent, which in turn has
[[Category:Cities in the Earth]] as a parent. Continents should not have the second parent like this. [DONE]
42. When checking `british_spelling`, it should check all containers as well; otherwise it's too hard to keep this in
sync across cities, administrative divisions and countries. [DONE]
43. `skip_polity_parent_type` should be renamed to container_parent_type or similar. [DONE]
44. There should be a flag to allow e.g. departments of France that are currently categorized as departments of their
region to also be categorized as departments of France.
45. Aliases are causing iterate_matching_holonym_location() to fail, e.g. if [[براق]] "Prague" is specified as
{{place|acw|capital city|c/Czechia|t1=Prague}}, this fails add a bare category [[Category:acw:Prague]] because
the code in iterate_matching_holonym_location() isn't resolving aliases when comparing the known container
'Czech Republic'. Probably we want to build an alias table to speed up these sorts of lookups. [DONE]
46. The district cat handler is failing to work right, e.g. in [[Saint-Gaudérique]] defined as
{{place|fr|district|city/Perpignan|in|dept/Pyrénées-Orientales|r/Occitania|c/France|t=Saint-Gaudérique}},
only the 'Places in ...' categories are getting triggered. [DONE; DUE TO TYPO IN HANDLER]
]=]
----------- Wikicode utility functions
-- Return a wikilink link {{l|language|text}}
local function link(text, langcode, id)
if not langcode then
return text
end
return m_links.full_link(
{term = text, lang = require(languages_module).getByCode(langcode, true, "allow etym"), id = id},
nil, "allow self link"
)
end
---------- Basic utility functions
-- Add the page to a tracking "category". To see the pages in the "category",
-- go to [[Wiktionary:Tracking/place/PAGE]] and click on "What links here".
local function track(page)
require(debug_track_module)("place/" .. page)
return true
end
local function ucfirst_all(text)
if text:find(" ") then
local parts = split(text, " ", true)
for i, part in ipairs(parts) do
parts[i] = m_strutils.ucfirst(part)
end
return concat(parts, " ")
else
return m_strutils.ucfirst(text)
end
end
local function lc(text)
return mw.getContentLanguage():lc(text)
end
---------- Argument parsing functions and utilities
-- Split an argument on comma, but not comma followed by whitespace.
local function split_on_comma(val)
if val:find(",") then
return require(parse_interface_module).split_on_comma(val)
else
return {val}
end
end
-- Split an argument on slash, but not slash occurring inside of HTML tags like </span> or <br />.
local function split_on_slash(arg)
if arg:find("<") then
local m_parse_utilities = require(parse_utilities_module)
-- We implement this by parsing balanced segment runs involving <...>, and splitting on slash in the remainder.
-- The result is a list of lists, so we have to rejoin the inner lists by concatenating.
local segments = m_parse_utilities.parse_balanced_segment_run(arg, "<", ">")
local slash_separated_groups = m_parse_utilities.split_alternating_runs(segments, "/")
for i, group in ipairs(slash_separated_groups) do
slash_separated_groups[i] = concat(group)
end
return slash_separated_groups
else
return split(arg, "/", true)
end
end
-- Implement "implications", i.e. where the presence of a given holonym causes additional holonym(s) to be added.
-- Implications apply only to categorization. There used to be support for "general implications" that applied to both
-- display and categorization, but there ended up not being any such implications, so we've removed the support. It is
-- a bad idea in any case to have such implications; the user might purposely leave out a higher-level polity to avoid
-- redundancy in several successive definitions, and we wouldn't want to override that. Note that in practice the
-- mechanism implemented by this function is used specifically for non-administrative geographic regions such as
-- Eastern Europe and the West Bank; there is a similar mechanism for administrative regions handled by
-- `augment_holonyms_with_containing_polity` in [[Module:place/placetypes]].
--
-- `place_descriptions` is a list of place descriptions (see top of file, collectively describing the data passed to
-- {{place}}). `implication_data` is the data used to implement the implications, i.e. a table indexed by holonym
-- placetype, each value of which is a table indexed by holonym placename, each value of which is a list of
-- "PLACETYPE/PLACENAME" holonyms to be added to the end of the list of holonyms.
local function handle_category_implications(place_descriptions, implication_data)
for i, desc in ipairs(place_descriptions) do
if desc.holonyms then
local new_holonyms = {}
for _, holonym in ipairs(desc.holonyms) do
insert(new_holonyms, holonym)
local imp_data = m_placetypes.get_equiv_placetype_prop(holonym.placetype, function(pt)
local implication = implication_data[pt] and implication_data[pt][holonym.unlinked_placename]
if implication then
return implication
end
end)
if imp_data then
for _, holonym_to_add in ipairs(imp_data) do
local split_holonym = split_on_slash(holonym_to_add)
if #split_holonym ~= 2 then
internal_error("Invalid holonym in implications: %s", holonym_to_add)
end
local holonym_placetype, holonym_placename = unpack(split_holonym, 1, 2)
local new_holonym = {
-- By the time we run, the display has already been generated so we don't need to set
-- display_placename.
placetype = holonym_placetype, unlinked_placename = holonym_placename
}
insert(new_holonyms, new_holonym)
m_placetypes.key_holonym_into_place_desc(desc, new_holonym)
end
end
end
desc.holonyms = new_holonyms
end
end
end
-- Split a holonym placename on commas but don't split on comma+space. This way, we split on "Poland,Belarus,Ukraine"
-- but keep "Tucson, Arizona" together.
local function split_holonym_placename(placename)
if placename:find(", ") then
local placenames = split(placename, ",", true)
local retval = {}
for i, placename in ipairs(placenames) do
if i > 1 and placename:find("^ ") then
retval[#retval] = retval[#retval] .. "," .. placename
else
insert(retval, placename)
end
end
return retval
else
return split(placename, ",", true)
end
end
-- Split a holonym (e.g. "continent/Europe" or "country/en:Italy" or "in southern" or "r:suf/O'Higgins" or
-- "c/Austria,Germany,Czech Republic") into its components. Return a list of holonym objects (see top of file). Note
-- that if there isn't a slash in the holonym (e.g. "in southern"), the `placetype` field of the holonym will be nil.
-- Placetype aliases (e.g. "r" for "region") and placename aliases (e.g. "US" or "USA" for "United States") will be
-- expanded.
local function split_holonym(raw)
local no_display, combined_holonym = raw:match("^(!)(.*)$")
no_display = not not no_display
combined_holonym = combined_holonym or raw
local suppress_comma, combined_holonym_without_comma = combined_holonym:match("^(%*)(.*)$")
suppress_comma = not not suppress_comma
combined_holonym = combined_holonym_without_comma or combined_holonym
local holonym_parts = split_on_slash(combined_holonym)
if #holonym_parts == 1 then
-- `unlinked_placename` should not be used.
return {{display_placename = combined_holonym, no_display = no_display, suppress_comma = suppress_comma}}
end
-- Rejoin further slashes in case of slash in holonym placename, e.g. Admaston/Bromley.
local placetype = holonym_parts[1]
local placename = concat(holonym_parts, "/", 2)
-- Check for modifiers after the holonym placetype.
local split_holonym_placetype = split(placetype, ":", true)
placetype = split_holonym_placetype[1]
local affix_type
local saw_also
local saw_the
for i = 2, #split_holonym_placetype do
local modifier = split_holonym_placetype[i]
if modifier == "also" then
if saw_also then
error(("Modifier ':also' occurs twice in holonym '%s'"):format(combined_holonym))
end
saw_also = true
elseif modifier == "the" then
if saw_the then
error(("Modifier ':the' occurs twice in holonym '%s'"):format(combined_holonym))
end
saw_the = true
elseif modifier == "pref" or modifier == "Pref" or modifier == "suf" or modifier == "Suf" or
modifier == "noaff" then
if affix_type then
error(("Affix-type modifier ':%s' occurs twice in holonym '%s'"):format(modifier, combined_holonym))
end
affix_type = modifier
else
error(("Unrecognized holonym placetype modifier '%s', should be one of " ..
"'pref', 'Pref', 'suf', 'Suf', 'noaff', 'also' or 'the'"):format(modifier))
end
end
placetype = m_placetypes.resolve_placetype_aliases(placetype)
local holonyms = split_holonym_placename(placename)
local pluralize_affix = #holonyms > 1
local affix_holonym_index = (affix_type == "pref" or affix_type == "Pref") and 1 or affix_type == "noaff" and 0 or #holonyms
for i, placename in ipairs(holonyms) do
-- Check for langcode before the holonym placename, but don't get tripped up by Wikipedia links, which begin
-- "[[w:...]]" or "[[wikipedia:]]".
local langcode, placename_without_langcode = rmatch(placename, "^([^%[%]]-):(.*)$")
if langcode then
placename = placename_without_langcode
end
placename = m_placetypes.resolve_placename_display_aliases(placetype, placename)
holonyms[i] = {
placetype = placetype,
display_placename = placename,
unlinked_placename = m_placetypes.remove_links_and_html(placename),
langcode = langcode,
affix_type = i == affix_holonym_index and affix_type or nil,
pluralize_affix = i == affix_holonym_index and pluralize_affix,
suppress_affix = i ~= affix_holonym_index,
no_display = no_display,
suppress_comma = suppress_comma,
continue_cat_loop = saw_also,
force_the = i == 1 and saw_the,
}
end
return holonyms
end
--[==[
Parse a "new-style" place description, with placetypes and holonyms surrounded by `<<...>>` amid otherwise raw text.
Return value is an object as documented at the top of the file. Exported for use by [[Module:demonyms]].
]==]
function export.parse_new_style_place_desc(text)
local placetypes = {}
local segments = split(text, "<<(.-)>>")
local retval = {holonyms = {}, order = {}}
for i, segment in ipairs(segments) do
if i % 2 == 1 then
insert(retval.order, {type = "raw", value = segment})
elseif segment:find("/") then
local holonyms = split_holonym(segment)
for j, holonym in ipairs(holonyms) do
if j > 1 then
if not holonym.no_display then
if j == #holonyms then
insert(retval.order, {type = "raw", value = " and "})
else
insert(retval.order, {type = "raw", value = ", "})
end
end
-- All but the first in a multi-holonym need an article. For the first one, the article is
-- specified in the raw text if needed. (Currently, needs_article is only used when displaying the
-- holonym, so it wouldn't matter when no_display is set, but we set it anyway in case we need it
-- for something else.)
holonym.needs_article = true
end
insert(retval.holonyms, holonym)
if not holonym.no_display then
insert(retval.order, {type = "holonym", value = #retval.holonyms})
end
m_placetypes.key_holonym_into_place_desc(retval, holonym)
end
else
local treat_as, display = segment:match("^(..-):(.+)$")
if treat_as then
segment = treat_as
else
display = segment
end
-- see if the placetype segment is just qualifiers
local only_qualifiers = true
local split_segments = split(segment, " ", true)
for _, split_segment in ipairs(split_segments) do
if m_placetypes.placetype_qualifiers[split_segment] == nil then
only_qualifiers = false
break
end
end
insert(placetypes, {placetype = segment, only_qualifiers = only_qualifiers})
if only_qualifiers then
insert(retval.order, {type = "qualifier", value = display})
else
insert(retval.order, {type = "placetype", value = display})
end
end
end
local final_placetypes = {}
for i, placetype in ipairs(placetypes) do
if i > 1 and placetypes[i - 1].only_qualifiers then
final_placetypes[#final_placetypes] = final_placetypes[#final_placetypes] .. " " .. placetypes[i].placetype
else
insert(final_placetypes, placetypes[i].placetype)
end
end
retval.placetypes = final_placetypes
return retval
end
--[=[
Process numeric args (except for the language code in 1=). `numargs` is a list of the numeric arguments passed to
{{place}} starting from 2=. The return value is a list of one or more place description objects, as described in the
long comment at the top of the file.
]=]
local function parse_place_descriptions(numargs)
local descs = {}
local this_desc
-- Index of separate (semicolon-separated) place descriptions within `descs`.
local desc_index = 1
-- Index of separate holonyms within a place description. 0 means we've seen no holonyms and have yet to process
-- the placetypes that precede the holonyms. 1 means we've seen no holonyms but have already processed the
-- placetypes.
local holonym_index = 0
local place_desc_style
for _, arg in ipairs(numargs) do
if arg == ";" or arg:find("^;[^ ]") then
if not this_desc then
error("Saw semicolon joiner without preceding place description")
end
if arg == ";" then
this_desc.joiner = "; "
this_desc.include_following_article = true
elseif arg == ";;" then
this_desc.joiner = " "
else
local joiner = arg:sub(2)
if rfind(joiner, "^%a") then
this_desc.joiner = " " .. joiner .. " "
else
this_desc.joiner = joiner .. " "
end
end
desc_index = desc_index + 1
holonym_index = 0
place_desc_style = nil
else
if arg:find("<<") then
if place_desc_style and place_desc_style ~= "new" then
-- error("New-style place description cannot directly follow old-style arguments")
-- There may be several of these; track and convert before making an error
track("new-after-old")
end
place_desc_style = "new"
if holonym_index > 0 then
desc_index = desc_index + 1
holonym_index = 0
end
this_desc = export.parse_new_style_place_desc(arg)
descs[desc_index] = this_desc
last_was_new_style = true
holonym_index = holonym_index + 1
else
if place_desc_style and place_desc_style ~= "old" then
error("Old-style arguments cannot directly follow new-style place description")
end
place_desc_style = "old"
if holonym_index == 0 then
local entry_placetypes = split_on_slash(arg)
this_desc = {placetypes = entry_placetypes, holonyms = {}}
descs[desc_index] = this_desc
holonym_index = holonym_index + 1
else
local holonyms = split_holonym(arg)
for j, holonym in ipairs(holonyms) do
if j > 1 then
-- All but the first in a multi-holonym need an article. Not for the first one because e.g.
-- {{place|en|city|s/Arizona|c/United States}} should not display as "a city in Arizona, the
-- United States". The first holonym given gets an article if needed regardless of our setting
-- here.
holonym.needs_article = true
-- Insert "and" before the last holonym.
if j == #holonyms then
this_desc.holonyms[holonym_index] = {
-- Use the no_display value from the first holonym; it should be the same for all
-- holonyms. `unlinked_placename` should not be used.
display_placename = "and", no_display = holonyms[1].no_display
}
holonym_index = holonym_index + 1
end
end
this_desc.holonyms[holonym_index] = holonym
m_placetypes.key_holonym_into_place_desc(this_desc, this_desc.holonyms[holonym_index])
holonym_index = holonym_index + 1
end
end
end
end
end
-- Tracking code. This does nothing but add tracking for seen placetypes and qualifiers. The place will be linked to
-- [[Wiktionary:Tracking/place/entry-placetype/PLACETYPE]] for all entry placetypes seen; in addition, if PLACETYPE
-- has qualifiers (e.g. 'small city'), there will be links for the bare placetype minus qualifiers and separately
-- for the qualifiers themselves:
-- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-placetype/BARE_PLACETYPE]]
-- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-qualifier/QUALIFIER]]
-- Note that if there are multiple qualifiers, there will be links for each possible split. For example, for
-- 'small maritime city'), there will be the following links:
-- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-placetype/small maritime city]]
-- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-placetype/maritime city]]
-- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-placetype/city]]
-- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-qualifier/small]]
-- [[Special:WhatLinksHere/Wiktionary:Tracking/place/entry-qualifier/maritime]]
-- Finally, there are also links for holonym placetypes, e.g. if the holonym 'c/Italy' occurs, there will be the
-- following link:
-- [[Special:WhatLinksHere/Wiktionary:Tracking/place/holonym-placetype/country]]
for _, desc in ipairs(descs) do
for _, entry_placetype in ipairs(desc.placetypes) do
local splits = m_placetypes.split_qualifiers_from_placetype(entry_placetype, "no canon qualifiers")
for _, split in ipairs(splits) do
local prev_qualifier, this_qualifier, bare_placetype = unpack(split, 1, 3)
track("entry-placetype/" .. bare_placetype)
if this_qualifier then
track("entry-qualifier/" .. this_qualifier)
end
end
end
for _, holonym in ipairs(desc.holonyms) do
if holonym.placetype then
track("holonym-placetype/" .. holonym.placetype)
end
end
end
return descs
end
-------- Definition-generating functions
-- Return a string with the wikilinks to the English translations of the word.
local function get_translations(transl, ids)
local ret = {}
for i, t in ipairs(transl) do
local arg_transls = split_on_comma(t)
local arg_ids = ids[i]
if arg_ids then
arg_ids = split_on_comma(arg_ids)
if #arg_transls ~= #arg_ids then
error(("Saw %s translation%s in t%s=%s but %s ID%s in tid%s=%s"):format(
#arg_transls, #arg_transls > 1 and "s" or "", i == 1 and "" or i, t, #arg_ids,
#arg_ids > 1 and "'s" or "", i == 1 and "" or i, ids[i]))
end
end
for j, arg_transl in ipairs(arg_transls) do
insert(ret, link(arg_transl, "en", arg_ids and arg_ids[j] or nil))
end
end
return concat(ret, ", ")
end
-- Prepend the appropriate article if needed to `linked_placename`, where the underlying holonym object that generated
-- `linked_placename` can be found at `holonym_index` in the holonyms in `place_desc`. This is only called to format the
-- holonym for display, and so we use the `display_placename` in the holonym. In this case, `linked_placename` is the
-- linked version of the display placename, possibly modified due to a display handler and possibly with a placetype
-- suffixed to the placename.
local function get_holonym_article(decorated_placename, place_desc, holonym_index)
local holonym = place_desc.holonyms[holonym_index]
local holonym_placetype = holonym.placetype
if not holonym_placetype then
return nil
end
local holonym_placename = holonym.unlinked_placename
local unlinked_decorated_placename = m_placetypes.remove_links_and_html(decorated_placename)
if unlinked_decorated_placename:find("^the ") then
return nil
end
local art = m_placetypes.get_equiv_placetype_prop(placetype, function(pt)
local art = m_placetypes.placename_article[pt] and m_placetypes.placename_article[pt][holonym_placename]
if art then
return art
end
-- Look for a known location. We should be using find_matching_holonym_location() but that function doesn't
-- currently work without alias resolution. Instead we check if any matching location has `the = true` set.
-- In practice there aren't any cases where a given placename matches two locations, only one of which has
-- `the = true` set.
for group, key, spec in m_placetypes.iterate_matching_location {
placetypes = holonym_placetype,
placename = holonym_placename,
alias_resolution = "none",
} do
-- `iterate_holonym_location` doesn't initialize the spec if alias resolution is turned off, so check both
-- the spec and group. Be careful in case `the = false` is explicitly given by the spec.
if spec.the ~= nil then
if spec.the then
return "the"
end
elseif group.default_the then
return "the"
end
end
end)
if art then
return art
end
if not holonym.affix_type then
-- See if the placetype requests an article to be placed before the holonym. This occurs e.g. with 'department',
-- which has the setting `affix_type = "suf"` placing the word "department" after the holonym, so that
-- "dept/Gironde" correctly generates "the Gironde department". But if the user overrode the affix type and e.g.
-- specified "dept:pref/Gironde", we'll wrongly get "the department of the Gironde", so in that case we need to
-- ignore the holonym article specified along with the placetype. (NOTE: We have since turned off the
-- `holonym_use_the` setting on 'department'.)
local holonym_use_the = m_placetypes.get_equiv_placetype_prop(holonym_placetype,
function(pt) return placetype_data[pt] and placetype_data[pt].holonym_use_the end)
if holonym_use_the then
return "the"
end
end
local universal_res = m_placetypes.placename_the_re["*"]
for _, re in ipairs(universal_res) do
if unlinked_decorated_placename:find(re) then
return "the"
end
end
local matched = m_placetypes.get_equiv_placetype_prop(holonym_placetype, function(pt)
local res = m_placetypes.placename_the_re[pt]
if not res then
return nil
end
for _, re in ipairs(res) do
if unlinked_decorated_placename:find(re) then
return true
end
end
return nil
end)
if matched then
return "the"
end
return nil
end
-- Convert a holonym into display format. This adds wikilinks to holonyms and passes them through any display handlers,
-- which may (e.g.) add the placetype to the holonym. If `needs_article` is true, prepend the article `"the"` if the
-- holonym requires it (e.g. if the holonym is `United States`). `needs_article` is set to true we are processing the
-- first specified holonym in an old-style place description (i.e. the holonym directly following the entry placetype,
-- with no raw-text holonym in between).
--
-- Examples:
-- ({placetype = "country", display_placename = "United States", unlinked_placename = "United States"}, true) returns
-- the template-expanded equivalent of "the {{l|en|United States}}".
-- ({placetype = "region", display_placename = "O'Higgins", unlinked_placename = "O'Higgins", affix_type = "suf"}, false)
-- returns the template-expanded equivalent of "{{l|en|O'Higgins}} region".
-- ({display_placename = "in the southern"}, false) returns "in the southern" (without wikilinking because .placetype
-- and .langcode are both nil).
local function format_holonym(place_desc, holonym_index, needs_article)
local holonym = place_desc.holonyms[holonym_index]
if holonym.no_display then
return ""
end
local orig_needs_article = needs_article
needs_article = needs_article or holonym.needs_article or holonym.force_the
local output = holonym.display_placename
local placetype = holonym.placetype
local affix_type_pt_data, affix_type, affix_is_prefix, affix, prefix, suffix, no_affix_strings
local pt_equiv_for_affix_type, already_seen_affix, need_affix
-- Implement display handlers.
local display_handler = m_placetypes.get_equiv_placetype_prop(placetype,
function(pt) return placetype_data[pt] and placetype_data[pt].display_handler end)
if display_handler then
output = display_handler(placetype, output)
end
if not holonym.suppress_affix then
-- Implement adding an affix (prefix or suffix) based on the holonym's placetype. The affix will be
-- added either if the placetype's placetype_data spec says so (by setting 'affix_type'), or if the
-- user explicitly called for this (e.g. by using 'r:suf/O'Higgins'). Before adding the affix,
-- however, we check to see if the affix is already present (e.g. the placetype is "district"
-- and the placename is "Mission District"). The placetype can override the affix to add (by setting
-- `prefix`, `suffix` or `affix`) and/or override the strings used for checking if the affix is already
-- present (by setting 'no_affix_strings', which defaults to the affix explicitly given through `prefix`,
-- `suffix` or `affix` if any are given). `prefix` and `suffix` take precedence over `affix` if both are
-- set, but only when the appropriate type of affix is requested.
-- Search through equivalent placetypes for a setting of `affix_type`, `affix`, `prefix` or `suffix`. If we
-- find any, use them. If `affix_type` is given, it is overridden by the user's explicitly specified affix
-- type. If either an `affix_type` is found or the user explicitly specified an affix type, the affix is
-- displayed according to the following:
-- 1. If `prefix`, `suffix` or `affix` is given by the placetype or equivalent placetypes, use it (e.g.
-- placetype `administrative region` requests suffix "region" but doesn't set affix type; if the user
-- explicitly specifies `administrative region` as the placetype for a holonym and specifies a suffixal
-- affix type, use "region"). In this search, we stop looking if we find an explicit `affix_type`
-- setting; if this is found without an associated affix setting, the assumption is the associated
-- placetype was intended as the affix, not some explicit affix setting associated with a fallback
-- placetype.
-- 2. Otherwise, if the user explicitly requested an affix type, use the actual placetype (principle of
-- least surprise).
-- 3. Finally, fall back to the placetype associated with an explicit `affix_type` setting (which will
-- always exist if we get this far).
affix_type_pt_data, pt_equiv_for_affix_type = m_placetypes.get_equiv_placetype_prop(placetype,
function(pt)
local cdpt = placetype_data[pt]
return cdpt and cdpt.affix_type and cdpt or nil
end
)
affix_pt_data, pt_equiv_for_affix = m_placetypes.get_equiv_placetype_prop(placetype,
function(pt)
local cdpt = placetype_data[pt]
return cdpt and (cdpt.affix_type or cdpt.affix or cdpt.prefix or cdpt.suffix) and cdpt or nil
end
)
if affix_type_pt_data then
affix_type = affix_type_pt_data.affix_type
need_affix = true
end
if affix_pt_data then
prefix = affix_pt_data.prefix or affix_pt_data.affix
suffix = affix_pt_data.suffix or affix_pt_data.affix
need_affix = true
end
no_affix_strings = affix_pt_data and affix_pt_data.no_affix_strings or
affix_type_pt_data and affix_type_pt_data.no_affix_strings
if holonym.affix_type and placetype then
affix_type = holonym.affix_type
prefix = prefix or placetype
suffix = suffix or placetype
need_affix = true
end
if need_affix then
-- At this point the affix_type has been determined and can't change any more, so we can figure out
-- whether we need the calculated prefix or suffix.
affix_is_prefix = affix_type == "pref" or affix_type == "Pref"
if affix_is_prefix then
affix = prefix
else
affix = suffix
end
if not affix then
if not pt_equiv_for_affix_type then
internal_error("Something wrong, `pt_equiv_for_affix_type` not set processing holonym: %s",
holonym)
end
affix = pt_equiv_for_affix_type.placetype
if not affix then
internal_error("Something wrong, no affix could be located in `pt_equiv_for_affix_type` for " ..
"holonym %s: %s", holonym, pt_equiv_for_affix_type)
end
end
no_affix_strings = no_affix_strings or lc(affix)
if holonym.pluralize_affix then
affix = m_placetypes.pluralize_placetype(affix)
end
already_seen_affix = m_placetypes.check_already_seen_string(output, no_affix_strings)
end
end
output = link(output, holonym.langcode or placetype and "en" or nil)
if need_affix and not affix_is_prefix and not already_seen_affix then
output = output .. " " .. (affix_type == "Suf" and ucfirst_all(affix) or affix)
end
if needs_article then
local article = holonym.force_the and "the" or get_holonym_article(output, place_desc, holonym_index)
if article then
output = article .. " " .. output
end
end
if affix_is_prefix and not already_seen_affix then
output = (affix_type == "Pref" and ucfirst_all(affix) or affix) .. " of " .. output
if orig_needs_article then
-- Put the article before the added affix if we're the first holonym in the place description. This is
-- distinct from the article added above for the holonym itself; cf. "c:pref/United States,Canada" ->
-- "the countries of the United States and Canada". We need to use the value of `needs_article` passed
-- in from the function, which indicates whether we're processing the first holonym.
output = "the " .. output
end
end
return output
end
-- Format a holonym for display, taking into account the entry's placetype (specifically, the last placetype if there
-- are more than one, excluding conjunctions and parenthetical items); the holonym's index among the holonyms in the
-- template (which specifies what the previous holonym is and whether it is the first holonym); and the overall place
-- description (which helps resolve ambiguities in holonyms when looking up known locations). This may involve putting a
-- preposition ("in" or "of") before the formatted holonym, particularly if it is the first one, and may involve
-- prepending a comma.
local function format_holonym_in_context(entry_placetype, place_desc, holonym_index)
local desc = ""
-- If holonym.placetype is nil, the holonym is just raw text, e.g. 'in southern'.
local holonym = place_desc.holonyms[holonym_index]
if not holonym.no_display then
-- First compute the initial delimiter.
if holonym_index == 1 then
if holonym.placetype then
desc = desc .. " " .. m_placetypes.get_placetype_entry_preposition(entry_placetype) .. " "
elseif not holonym.display_placename:find("^,") then
desc = desc .. " "
end
else
local prev_holonym = place_desc.holonyms[holonym_index - 1]
if prev_holonym.placetype and holonym.display_placename ~= "and" and holonym.display_placename ~= "in" and
not holonym.suppress_comma then
desc = desc .. ","
end
if holonym.placetype or not holonym.display_placename:find("^,") then
desc = desc .. " "
end
end
end
return desc .. format_holonym(place_desc, holonym_index, holonym_index == 1)
end
-- Return the linked description of a placetype. This splits off any qualifiers and displays them separately.
local function get_placetype_description(placetype)
local splits = m_placetypes.split_qualifiers_from_placetype(placetype)
local prefix = ""
for _, split in ipairs(splits) do
local prev_qualifier, this_qualifier, bare_placetype = unpack(split, 1, 3)
if this_qualifier then
prefix = (prev_qualifier and prev_qualifier .. " " .. this_qualifier or this_qualifier) .. " "
else
prefix = ""
end
local display_form = m_placetypes.get_placetype_display_form(bare_placetype)
if display_form then
return prefix .. display_form
end
placetype = bare_placetype
end
return prefix .. placetype
end
-- Return the linked description of a qualifier (which may be multiple words).
local function get_qualifier_description(qualifier)
local splits = m_placetypes.split_qualifiers_from_placetype(qualifier .. " foo")
local split = splits[#splits]
local prev_qualifier, this_qualifier, bare_placetype = unpack(split, 1, 3)
return prev_qualifier and prev_qualifier .. " " .. this_qualifier or this_qualifier
end
local term_param_mods = {
t = {
item_dest = "gloss",
},
tr = {},
ts = {},
g = {
-- We need to store the <g:...> inline modifier into the "genders" key of the parsed part, because that is what
-- [[Module:links]] expects.
item_dest = "genders",
convert = function(arg, parse_err)
return split(arg, ",", true)
end,
},
id = {},
alt = {},
q = {},
qq = {},
sc = {
convert = function(arg, parse_err)
return arg and require("Module:scripts").getByCode(arg, parse_err) or nil
end,
}
}
-- Return a string with extra information that is sometimes added to a definition. This consists of the tag, a
-- whitespace and the value (wikilinked if it language contains a language code; if ucfirst == true, ". " is added
-- before the string and the first character is made upper case).
local function get_extra_info(args, paramname, tag, ucfirst, auto_plural, with_colon, conjunction)
local values = args[paramname]
if not values then
return ""
end
if type(values) ~= "table" then
values = {values}
end
if #values == 0 then
return ""
end
if auto_plural and #values > 1 then
tag = pluralize(tag)
end
if with_colon then
tag = tag .. ":"
end
local linked_values = {}
for _, val in ipairs(values) do
local function generate_obj(term, parse_err)
local obj = {}
if term:find(":") then
local actual_term, termlang = require(parse_utilities_module).parse_term_with_lang {
term = term,
parse_err = parse_err
}
obj.term = actual_term
obj.lang = termlang
else
obj.term = term
end
obj.lang = obj.lang or enlang
return obj
end
local terms
-- Check for inline modifier, e.g. מרים<tr:Miryem>. But exclude HTML entry with <span ...>, <i ...>, <br/> or
-- similar in it, caused by wrapping an argument in {{l|...}}, {{af|...}} or similar. Basically, all tags of
-- the sort we parse here should consist of a less-than sign, plus letters, plus a colon, e.g. <tr:...>, so if
-- we see a tag on the outer level that isn't in this format, we don't try to parse it. The restriction to the
-- outer level is to allow generated HTML inside of e.g. qualifier tags, such as foo<q:similar to {{m|fr|bar}}>.
if val:find("<") and not val:find("^[^<]*<[a-z]*[^a-z:]") then
terms = require(parse_utilities_module).parse_inline_modifiers(val, {
paramname = paramname,
param_mods = term_param_mods,
generate_obj = generate_obj,
splitchar = ",",
})
else
if val:find(",<") then
-- this happens when there's an embedded {{,}} template; easiest not to try and parse the extra info
-- spec as multiple terms
terms = {val}
else
terms = split_on_comma(val)
end
for i, split in ipairs(terms) do
terms[i] = generate_obj(split)
end
end
for _, term in ipairs(terms) do
insert(linked_values, m_links.full_link(term, nil, "allow self link", "show qualifiers"))
end
end
local s = ""
if ucfirst then
s = s .. ". " .. m_strutils.ucfirst(tag)
else
s = s .. "; " .. tag
end
return s .. " " .. m_table.serialCommaJoin(linked_values, {conj = conjunction or "and"})
end
-- Format an old-style place description (with separate arguments for the placetype and each holonym) for display and
-- return the resulting string.
local function format_old_style_place_desc_for_display(args, place_desc, desc_index, with_article, ucfirst)
-- The placetype used to determine whether "in" or "of" follows is the last placetype if there are
-- multiple slash-separated placetypes, but ignoring "and", "or" and parenthesized notes
-- such as "(one of 254)".
local entry_placetype = nil
local placetypes = place_desc.placetypes
local function is_and_or(item)
return item == "and" or item == "or"
end
local parts = {}
local function ins(txt)
insert(parts, txt)
end
local function ins_space()
if #parts > 0 then
ins(" ")
end
end
local and_or_pos
for i, placetype in ipairs(placetypes) do
if is_and_or(placetype) then
and_or_pos = i
-- no break here; we want the last in case of more than one
end
end
local remaining_placetype_index
if and_or_pos then
track("multiple-placetypes-with-and")
if and_or_pos == #placetypes then
error("Conjunctions 'and' and 'or' cannot occur last in a set of slash-separated placetypes: " ..
concat(placetypes, "/"))
end
local items = {}
for i = 1, and_or_pos + 1 do
local pt = placetypes[i]
if is_and_or(pt) then
-- skip
elseif i > 1 and pt:find("^%(") then
-- append placetypes beginning with a paren to previous item
items[#items] = items[#items] .. " " .. pt
else
entry_placetype = pt
insert(items, get_placetype_description(pt))
end
end
ins(m_table.serialCommaJoin(items, {conj = placetypes[and_or_pos]}))
remaining_placetype_index = and_or_pos + 2
else
remaining_placetype_index = 1
end
for i = remaining_placetype_index, #placetypes do
local pt = placetypes[i]
-- Check for and, or and placetypes beginning with a paren (so that things like
-- "{{place|en|county/(one of 254)|s/Texas}}" work).
if m_placetypes.placetype_is_ignorable(pt) then
ins_space()
ins(pt)
else
entry_placetype = pt
-- Join multiple placetypes with comma unless placetypes are already
-- joined with "and". We allow "the" to precede the second placetype
-- if they're not joined with "and" (so we get "city and county seat of ..."
-- but "city, the county seat of ...").
if i > 1 then
ins(", ")
local article = m_placetypes.get_placetype_article(pt)
if article ~= "the" and i > remaining_placetype_index then
-- Track cases where we are comma-separating multiple placetypes without the second one starting
-- with "the", as they may be mistakes. The occurrence of "the" is usually intentional, e.g.
-- {{place|zh|municipality/state capital|s/Rio de Janeiro|c/Brazil|t1=Rio de Janeiro}}
-- for the city of [[Rio de Janeiro]], which displays as "a municipality, the state capital of ...".
track("multiple-placetypes-without-and-or-the")
end
ins(article)
ins(" ")
end
ins(get_placetype_description(pt))
end
end
if args.also then
ins_space()
ins("and ")
ins(args.also)
end
if place_desc.holonyms then
for holonym_index, _ in ipairs(place_desc.holonyms) do
ins(format_holonym_in_context(entry_placetype, place_desc, holonym_index))
end
end
local gloss = concat(parts)
if with_article then
local article
if desc_index == 1 then
article = args.a
else
if not place_desc.holonyms then
-- there isn't a following holonym; the place type given might be raw text as well, so don't add
-- an article.
with_article = false
else
local saw_placetype_holonym = false
for _, holonym in ipairs(place_desc.holonyms) do
if holonym.placetype then
saw_placetype_holonym = true
break
end
end
if not saw_placetype_holonym then
-- following holonym(s)s is/are just raw text; the place type given might be raw text as well,
-- so don't add an article.
with_article = false
end
end
if with_article then
track("second-or-higher-description-with-added-article")
else
track("second-or-higher-description-suppressed-article")
end
end
if with_article then
article = article or m_placetypes.get_placetype_article(place_desc.placetypes[1], ucfirst)
gloss = article .. " " .. gloss
end
end
return gloss
end
--[==[
Get the full gloss (English description) of a new-style place description. New-style place descriptions are
specified with a single string containing raw text interspersed with placetypes and holonyms surrounded by `<<...>>`.
Exported for use by [[Module:demonyms]].
]==]
function export.format_new_style_place_desc_for_display(args, place_desc, with_article)
local parts = {}
if with_article and args.a then
insert(parts, args.a .. " ")
end
for _, order in ipairs(place_desc.order) do
local segment_type, segment = order.type, order.value
if segment_type == "raw" then
insert(parts, segment)
elseif segment_type == "placetype" then
insert(parts, get_placetype_description(segment))
elseif segment_type == "qualifier" then
insert(parts, get_qualifier_description(segment))
elseif segment_type == "holonym" then
insert(parts, format_holonym(place_desc, segment, false))
else
internal_error("Unrecognized segment type %s", segment_type)
end
end
return concat(parts)
end
-- Return a string with the gloss (the description of the place itself, as opposed to translations). If `ucfirst` is
-- given, the gloss's first letter is made upper case and a period is added to the end. If `drop_extra_info` is given,
-- we don't include "extra info" (modern name, capital, largest city, etc.); this is used when transcluding into
-- another language using {{transclude sense}}.
local function get_display_form(args, descs, ucfirst, drop_extra_info)
if args.def == "-" then
return ""
elseif args.def then
if args.def:find("<<") then
local def_desc = export.parse_new_style_place_desc(args.def)
return export.format_new_style_place_desc_for_display({}, def_desc, false)
else
return args.def
end
end
local glosses = {}
local include_article = true
local gloss_ucfirst = ucfirst
for n, desc in ipairs(descs) do
if desc.order then
insert(glosses, export.format_new_style_place_desc_for_display(args, desc, n == 1))
else
insert(glosses, format_old_style_place_desc_for_display(args, desc, n, include_article, gloss_ucfirst))
end
if desc.joiner then
insert(glosses, desc.joiner)
end
include_article = desc.include_following_article
gloss_ucfirst = false
end
local ret = {concat(glosses)}
if not drop_extra_info then
insert(ret, get_extra_info(args, "modern", "modern", false, false, false, "or"))
insert(ret, get_extra_info(args, "full", "in full,", false, false, false, "or"))
insert(ret, get_extra_info(args, "short", "short form", false, false, false, "or"))
insert(ret, get_extra_info(args, "abbr", "abbreviation", false, false, false, "or"))
insert(ret, get_extra_info(args, "official", "official name", ucfirst, "auto plural", "with colon"))
insert(ret, get_extra_info(args, "capital", "capital", ucfirst, "auto plural", "with colon"))
insert(ret, get_extra_info(args, "largest city", "largest city", ucfirst, "auto plural", "with colon"))
insert(ret, get_extra_info(args, "caplc", "capital and largest city", ucfirst, false, "with colon"))
local placetype = descs[1].placetypes[1]
if placetype == "county" or placetype == "counties" then
placetype = "county seat"
elseif placetype == "parish" or placetype == "parishes" then
placetype = "parish seat"
elseif placetype == "borough" or placetype == "boroughs" then
placetype = "borough seat"
else
placetype = "seat"
end
insert(ret, get_extra_info(args, "seat", placetype, ucfirst, "auto plural", "with colon"))
insert(ret, get_extra_info(args, "shire town", "shire town", ucfirst, "auto plural", "with colon"))
insert(ret, get_extra_info(args, "headquarters", "headquarters", ucfirst, false, "with colon"))
end
return concat(ret)
end
-- Old entry point. OBSOLETE ME!
export.get_new_style_gloss = export.format_new_style_place_desc_for_display
-- Return the definition line.
local function get_def(args, specs, drop_extra_info)
if #args.t > 0 then
local gloss = get_display_form(args, specs, false, drop_extra_info)
return get_translations(args.t, args.tid) .. (gloss == "" and "" or " (" .. gloss .. ")")
else
return get_display_form(args, specs, true, drop_extra_info)
end
end
---------- Functions for the category wikicode
-- The code in this section finds the categories to which a given place belongs. See comment at top of file.
--[=[
Find the appropriate category specs for a given place description and placetype. For example, for the template
invocation {{tl|place|en|city/and/county|s/Pennsylvania|c/US}}, which results in the place description
```
{
placetypes = {"city", "and", "county"},
holonyms = {
{placetype = "state", display_placename = "Pennsylvania", unlinked_placename = "Pennsylvania"},
{placetype = "country", display_placename = "United States", unlinked_placename = "United States"},
},
holonyms_by_placetype = {
state = {"Pennsylvania"},
country = {"United States"},
},
}
```
the call
```
find_placetype_cat_specs {
entry_placetype = "city",
place_desc = {
placetypes = {"city", "and", "county"},
holonyms = {
{placetype = "state", display_placename = "Pennsylvania", unlinked_placename = "Pennsylvania"},
{placetype = "country", display_placename = "United States", unlinked_placename = "United States"},
},
holonyms_by_placetype = {
state = {"Pennsylvania"},
country = {"United States"},
},
},
}
```
might produce the return value
```
{
entry_placetype = "city",
cat_specs = {"Cities in Pennsylvania, USA"},
triggering_holonym = {placetype = "state", display_placename = "Pennsylvania", unlinked_placename = "Pennsylvania"},
triggering_holonym_index = 1,
}
```
See the comment at the top of the section for a description of category specs and the overall algorithm.
On entry, `data` is an object with the following fields:
* `entry_placetype`: the entry placetype (or equivalent) used to look up the category data in placetype_data,
which must have already been resolved to a placetype with an entry in `placetype_data`;
* `place_desc`: the full place description as documented at the top of the file (used only for its holonyms);
* `first_holonym_index`: the index of the first holonym to consider when iterating through the holonyms (used to
implement the `:also` holonym placetype modifier);
* `overriding_holonym`: an optional overriding holonym to use, in place of iterating through the holonyms (used to
implement categorizing other holonyms of the same type as the triggering holonym, so that e.g.
{{tl|place|en|river|s/Kansas,Nebraska}}, or equivalently {{tl|place|en|river|s/Kansas|and|s/Nebraska}}, works);
* `from_demonym`: we are called from {{tl|demonym-noun}} or {{tl|demonym-adj}} instead of {{tl|place}}, and should
generate categories appropriate to those templates.
The return value is {nil} if no category specs could be located, otherwise an object with the following fields:
* `entry_placetype`: the placetype that should be used to construct categories when `true` is one of the returned
category specs (normally the same as the `entry_placetype` passed in, but will be different when a "fallback" key
exists and is used);
* `cat_specs`: list of category specs as described above;
* `triggering_holonym`: the triggering holonym (see the comment at the top of the section), or nil if there was no
triggering holonym;
* `triggering_holonym_index`: the index of the triggering holonym in the list of holonyms in `place_desc`, or nil if
an overriding holonym was passed in or there was no triggering holonym.
]=]
local function find_placetype_cat_specs(data)
local entry_placetype, place_desc, first_holonym_index, overriding_holonym, from_demonym =
data.entry_placetype, data.place_desc, data.first_holonym_index, data.overriding_holonym, data.from_demonym
local function fetch_cat_specs(holonym_to_match, index, no_fallback)
local holonym_placetype = holonym_to_match.placetype
if not holonym_placetype then
-- raw text in place of holonym
return nil
end
local holonym_placename = holonym_to_match.unlinked_placename
if not holonym_placename then
internal_error("Missing unlinked_placename in holonym (index %s): %s", index, holonym_to_match)
end
local cat_specs, equiv_entry_placetype_and_qualifier = m_placetypes.get_equiv_placetype_prop(entry_placetype,
function(equiv_entry_pt)
return m_placetypes.get_equiv_placetype_prop(holonym_placetype,
function(equiv_holonym_pt) return m_placetypes.political_division_cat_handler {
entry_placetype = equiv_entry_pt,
holonym_placetype = equiv_holonym_pt,
holonym_placename = holonym_placename,
holonym_index = index,
place_desc = place_desc,
from_demonym = from_demonym,
} end)
end,
{no_fallback = no_fallback}
)
if cat_specs and cat_specs[1] then
return cat_specs, equiv_entry_placetype_and_qualifier.placetype
end
local cat_handler, equiv_entry_placetype_and_qualifier = m_placetypes.get_equiv_placetype_prop(entry_placetype,
function(equiv_entry_pt)
local entry_placetype_data = m_placetypes.placetype_data[equiv_entry_pt]
if entry_placetype_data and entry_placetype_data.cat_handler then
return entry_placetype_data.cat_handler
end
end,
{no_fallback = no_fallback}
)
if cat_handler then
local cat_specs = m_placetypes.get_equiv_placetype_prop(holonym_placetype,
function(equiv_holonym_pt) return cat_handler {
entry_placetype = equiv_entry_placetype_and_qualifier.placetype,
holonym_placetype = equiv_holonym_pt,
holonym_placename = holonym_placename,
holonym_index = index,
place_desc = place_desc,
from_demonym = from_demonym,
} end)
if cat_specs and cat_specs[1] then
return cat_specs, equiv_entry_placetype_and_qualifier.placetype
end
end
if not no_fallback then
local cat_specs, equiv_entry_placetype_and_qualifier = m_placetypes.get_equiv_placetype_prop(entry_placetype,
function(equiv_entry_pt)
local entry_placetype_data = m_placetypes.placetype_data[equiv_entry_pt]
if entry_placetype_data then
return m_placetypes.get_equiv_placetype_prop(holonym_placetype,
function(equiv_holonym_pt)
return entry_placetype_data[equiv_holonym_pt .. "/*"]
end)
end
end
)
if cat_specs and cat_specs[1] then
return cat_specs, equiv_entry_placetype_and_qualifier.placetype
end
end
return nil
end
if overriding_holonym then
-- FIXME, change the algorithm to eliminate overriding_holonym
local cat_specs, fetched_entry_placetype = fetch_cat_specs(overriding_holonym, nil)
if cat_specs and cat_specs[1] then
return {
entry_placetype = fetched_entry_placetype,
cat_specs = cat_specs,
triggering_holonym = overriding_holonym,
-- no triggering_holonym_index
}
end
else
-- We loop twice over holonyms, the first time setting `no_fallback` so that we process only category specs for
-- the specifically given entry placetype (possibly with preceding qualifiers). The reason for this is to
-- correctly handle cases like [[Poblacion IX]]:
-- {{place|en|barangay|mun/Roxas|p/Capiz|c/Philippines}}.
-- "barangay" falls back to "neighborhood", and without the `no_fallback` loop, the neighborhood cat handler run
-- on the mun/Roxas holonym will take precedence over the barangay-specific setting for p/Capiz because we
-- check, for each holonym in turn, first for a matching spec through political_division_cat_handler, then a cat
-- handler, then a wildcard spec like country/*. During the first no-fallback loop, we disable checking for
-- wildcard specs because it seems a fallback matching exactly or through a cat handler on an earlier holonym
-- would be better than a wildcard match for the exact entry placetype at a later holonym. (FIXME: But I don't
-- know for sure; maybe we should check wildcard holonyms on the exact entry placetype first, or contrariwise
-- maybe we should check only exact-match holonyms through political_division_cat_handler on the exact entry
-- placetype first, not even checking other cat handlers.)
for i, holonym in ipairs(place_desc.holonyms) do
if first_holonym_index and i < first_holonym_index then
-- continue
else
local cat_specs, fetched_entry_placetype = fetch_cat_specs(holonym, i, "no_fallback")
if cat_specs and cat_specs[1] then
return {
entry_placetype = fetched_entry_placetype,
cat_specs = cat_specs,
triggering_holonym = holonym,
triggering_holonym_index = i,
}
end
end
end
for i, holonym in ipairs(place_desc.holonyms) do
if first_holonym_index and i < first_holonym_index then
-- continue
else
local cat_specs, fetched_entry_placetype = fetch_cat_specs(holonym, i)
if cat_specs and cat_specs[1] then
return {
entry_placetype = fetched_entry_placetype,
cat_specs = cat_specs,
triggering_holonym = holonym,
triggering_holonym_index = i,
}
end
end
end
end
return nil
end
-- Turn a list of category specs (see comment at section top) into the corresponding categories (minus the language
-- code prefix). The function is given the following arguments:
-- (1) the category specs retrieved using find_placetype_cat_specs();
-- (2) the entry placetype used to fetch the entry in `placetype_data`
-- (3) the triggering holonym (a holonym object; see comment at top of file) used to fetch the category specs
-- (see top-of-section comment); or nil if no triggering holonym.
-- The return value is constructed as described in the top-of-section comment.
local function cat_specs_to_categories(place_desc, cat_data)
local all_cats = {}
local cat_specs, entry_placetype, triggering_holonym, triggering_holonym_index =
cat_data.cat_specs, cat_data.entry_placetype, cat_data.triggering_holonym, cat_data.triggering_holonym_index
if triggering_holonym then
for _, cat_spec in ipairs(cat_specs) do
local cat
if cat_spec == true then
cat = m_placetypes.pluralize_placetype(entry_placetype, "ucfirst") .. " " ..
m_placetypes.get_placetype_entry_preposition(entry_placetype) .. " +++"
else
cat = cat_spec
end
if cat:find("%+%+%+") then
local group, key, spec, container_trail = m_placetypes.find_matching_holonym_location {
holonym_placetype = triggering_holonym.placetype,
holonym_placename = triggering_holonym.unlinked_placename,
holonym_index = triggering_holonym_index,
place_desc = place_desc,
}
if group then
cat = cat:gsub("%+%+%+", m_strutils.replacement_escape(m_placetypes.get_prefixed_key(key, spec)))
insert(all_cats, cat)
else
mw.log(("Unable to insert category for cat spec '%s' because holonym '%s/%s' did not match a " ..
"known location"):format(cat, triggering_holonym.placetype, triggering_holonym.unlinked_placename))
track("cant-match-holonym-for-category-spec")
end
else
insert(all_cats, cat)
end
end
else
for _, cat_spec in ipairs(cat_specs) do
local cat
if cat_spec == true then
cat = m_placetypes.pluralize_placetype(entry_placetype, "ucfirst")
else
cat = cat_spec
if cat:find("%+%+%+") then
internal_error("Category %s contains +++ but there is no holonym to substitute", cat)
end
end
insert(all_cats, cat)
end
end
return all_cats
end
-- Return the categories (without initial lang code) that should be added to the entry, given the place description
-- (which specifies the entry placetype(s) and holonym(s); see top of file) and a particular entry placetype (e.g.
-- "city"). Note that only the holonyms from the place description are looked at, not the entry placetypes in the place
-- description.
local function get_placetype_cats(place_desc, entry_placetype, from_demonym)
local cats = {}
local first_holonym_index = 1
while first_holonym_index <= #place_desc.holonyms do
-- Find the category specs (see top-of-file comment) corresponding to the holonym(s) in the place description.
local cat_data = find_placetype_cat_specs {
entry_placetype = entry_placetype,
place_desc = place_desc,
first_holonym_index = first_holonym_index,
from_demonym = from_demonym,
}
-- Check if no category spec could be found.
if not cat_data then
break
end
local triggering_holonym = cat_data.triggering_holonym
if not triggering_holonym then
internal_error("find_placetype_cat_specs should have returned a triggering holonym: %s", cat_data)
end
-- Generate categories for the category specs found.
extend(cats, cat_specs_to_categories(place_desc, cat_data))
-- Also generate categories for other holonyms of the same placetype, so that e.g.
-- {{place|en|city|s/Kansas|and|s/Missouri|c/USA}} generates both [[:Category:en:Cities in Kansas, USA]] and
-- [[:Category:en:Cities in Missouri, USA]].
first_holonym_index = cat_data.triggering_holonym_index
-- Loop over non-fallback equivalent placetypes to the triggering holonym's placetype, in case it is
-- non-canonical (e.g. `cities/San Francisco`). This matches the loop over equivalent places in
-- key_holonym_into_place_desc().
local equiv_triggering_placetypes = m_placetypes.get_placetype_equivs(triggering_holonym.placetype,
{no_fallback = true})
for _, equiv in ipairs(equiv_triggering_placetypes) do
local other_holonyms_of_same_type = place_desc.holonyms_by_placetype[equiv.placetype]
if other_holonyms_of_same_type then
for _, other_placename_of_same_type in ipairs(other_holonyms_of_same_type) do
if other_placename_of_same_type ~= triggering_holonym.unlinked_placename then
local overriding_holonym = {
placetype = triggering_holonym.placetype,
unlinked_placename = other_placename_of_same_type,
}
local other_cat_data = find_placetype_cat_specs {
entry_placetype = entry_placetype,
place_desc = place_desc,
overriding_holonym = overriding_holonym,
from_demonym = from_demonym,
}
if other_cat_data then
extend(cats, cat_specs_to_categories(place_desc, other_cat_data))
end
end
end
end
end
-- If there are any later-specified holonyms that had the modifier :also, try to produce categories for them
-- as well.
first_holonym_index = first_holonym_index + 1
while first_holonym_index <= #place_desc.holonyms do
if place_desc.holonyms[first_holonym_index].continue_cat_loop then
break
end
first_holonym_index = first_holonym_index + 1
end
end
if cats[1] then
return cats
end
local entry_pt_default, equiv_entry_placetype_and_qualifier =
m_placetypes.get_equiv_placetype_prop(entry_placetype, function(pt)
return m_placetypes.placetype_data[pt] and m_placetypes.placetype_data[pt].default
end)
if entry_pt_default then
return cat_specs_to_categories(place_desc, {
cat_specs = entry_pt_default,
entry_placetype = equiv_entry_placetype_and_qualifier.placetype,
-- no triggering holonym
})
end
return {}
end
--[==[
Iterate through each type of place given `place_descriptions` (a list of place descriptions, as documented at the
top of the file) and return a list of the categories that need to be added to the entry. The returned categories need to
be prefixed with the langcode to get the actual Wiktionary categories, and passed to `format_categories` in
[[Module:utilities]] to format the categories into strings. `args` is the table of user-specified arguments, used
primarily to add "bare categories" corresponding to toponyms for known locations. `from_demonym` is true if we're being
called from {{tl|demonym-noun}} or {{tl|demonym-adj}}. In this case, we only want certain categories added, specifically
bare categories corresponding to the most specific specified holonym(s).
]==]
function export.get_cats(args, place_descriptions, from_demonym)
local cats = {}
handle_category_implications(place_descriptions, m_placetypes.cat_implications)
m_placetypes.augment_holonyms_with_container(place_descriptions)
if not from_demonym then
local bare_categories = m_placetypes.get_bare_categories(args, place_descriptions)
extend(cats, bare_categories)
end
for _, place_desc in ipairs(place_descriptions) do
if not from_demonym then
for _, placetype in ipairs(place_desc.placetypes) do
if not m_placetypes.placetype_is_ignorable(placetype) then
extend(cats, get_placetype_cats(place_desc, placetype))
end
end
end
-- Also add base categories for the holonyms listed (e.g. a category like
-- [[Category:Places in Merseyside, England]]). This is handled through the special placetype "*".
extend(cats, get_placetype_cats(place_desc, "*", from_demonym))
end
if args.cat then -- not necessarily when called from [[Module:demonym]]
for _, cat in ipairs(args.cat) do
local split_cats = split_on_comma(cat)
extend(cats, split_cats)
end
end
return cats
end
-- Return the category link for a category, given the language code and the name of the category.
local function format_cats(lang, cats, sort_key)
local full_cats = {}
local langcode = lang:getFullCode()
for _, cat in ipairs(cats) do
insert(full_cats, langcode .. ":" .. cat)
end
return require(utilities_module).format_categories(full_cats, lang, sort_key, nil,
force_cat or m_placetypes.get_force_cat())
end
----------- Main entry point
--[==[
Implementation of {{tl|place}}. Meant to be callable from another module (specifically, [[Module:transclude]]).
`drop_extra_info` means to not include "extra info" (modern name, capital, largest city, etc.); this is used when
transcluding into another language using {{tl|tcl}}.
]==]
function export.format(template_args, drop_extra_info)
local list_param = {list = true}
local params = {
[1] = {required = true, type = "language", default = "und"},
[2] = {required = true, list = true},
["t"] = list_param,
["tid"] = {list = true, allow_holes = true},
["cat"] = list_param,
["nocat"] = {type = "boolean"},
["sort"] = true,
["pagename"] = true, -- for testing or documentation purposes
["a"] = true,
["also"] = true,
["def"] = true,
-- params that are only used when transcluding using {{tcl}}/{{transclude}}, to transmit information to {{tcl}}.
["tcl"] = true,
["tcl_t"] = list_param,
["tcl_tid"] = list_param,
["tcl_nolb"] = true,
["tcl_noextratext"] = {type = "boolean"},
-- "extra info" that can be included
["modern"] = list_param,
["full"] = list_param,
["short"] = list_param,
["abbr"] = list_param,
["official"] = list_param,
["capital"] = list_param,
["largest city"] = list_param,
["caplc"] = true,
["seat"] = list_param,
["shire town"] = list_param,
["headquarters"] = list_param,
}
-- FIXME, once we've flushed out any uses, delete the following clause. That will cause def= to be ignored.
if template_args.def == "" then
error("Cannot currently pass def= as an empty parameter; use def=- if you want to suppress the definition display")
end
local args = require("Module:parameters").process(template_args, params)
local place_descriptions = parse_place_descriptions(args[2])
return get_def(args, place_descriptions, drop_extra_info) .. (
args.nocat and "" or format_cats(args[1], export.get_cats(args, place_descriptions), args.sort))
end
--[==[
Actual entry point of {{tl|place}}.
]==]
function export.show(frame)
return export.format(frame:getParent().args)
end
return export