<OAI-PMH xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.openarchives.org/OAI/2.0/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/          http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
  <responseDate>2026-04-04T15:33:31.004Z</responseDate>
  <request verb="GetRecord">https://www.nb.no/sprakbanken/oai</request>
  <GetRecord>
    <record>
      <header>
        <identifier>oai:nb.no:sbr-50</identifier>
        <datestamp/>
      </header>
      <metadata>
        <cmd:CMD xmlns:cmd="http://www.clarin.eu/cmd/1" xmlns="http://www.clarin.eu/cmd/" xmlns:cmdp="http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1407745711925" CMDVersion="1.2" xsi:schemaLocation="http://www.clarin.eu/cmd/1 https://infra.clarin.eu/CMDI/1.x/xsd/cmd-envelop.xsd http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1407745711925 https://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/1.1/profiles/clarin.eu:cr1:p_1407745711925/1.2/xsd">
          <cmd:Header>
            <cmd:MdCreator>nb:Nasjonalbiblioteket¦nn:Nasjonalbiblioteket¦en:National Library of Norway</cmd:MdCreator>
            <cmd:MdCreationDate>2021-06-24</cmd:MdCreationDate>
            <cmd:MdSelfLink>https://www.nb.no/sprakbanken/oai?verb=GetRecord&amp;identifier=oai:nb.no:sbr-50&amp;metadataPrefix=cmdi</cmd:MdSelfLink>
            <cmd:MdProfile>clarin.eu:cr1:p_1407745711925</cmd:MdProfile>
            <cmd:MdCollectionDisplayName>Språkbanken NB</cmd:MdCollectionDisplayName>
          </cmd:Header>
          <cmd:Resources>
            <cmd:ResourceProxyList>
              <cmd:ResourceProxy id="wped_data">
                <cmd:ResourceType mimetype="application/x-gtar">Resource</cmd:ResourceType>
                <cmd:ResourceRef>https://www.nb.no/sbfil/tekst/2019_wikipedia.tar.gz</cmd:ResourceRef>
              </cmd:ResourceProxy>
              <cmd:ResourceProxy id="wped_doc">
                <cmd:ResourceType mimetype="application/pdf">Resource</cmd:ResourceType>
                <cmd:ResourceRef>https://www.nb.no/sbfil/dok/2019_wikipedia.pdf</cmd:ResourceRef>
              </cmd:ResourceProxy>
            </cmd:ResourceProxyList>
            <cmd:JournalFileProxyList/>
            <cmd:ResourceRelationList>
              <cmd:ResourceRelation>
                <cmd:RelationType>describes</cmd:RelationType>
                <cmd:Resource>
                  <cmd:Role>
                    <cmd:Resource>
                      <cmd:Role/>
                    </cmd:Resource>
                  </cmd:Role>
                </cmd:Resource>
              </cmd:ResourceRelation>
            </cmd:ResourceRelationList>
          </cmd:Resources>
          <cmd:IsPartOfList/>
          <cmd:Components>
            <cmdp:corpusProfile>
              <cmdp:resourceCommonInfo>
                <cmdp:resourceType>corpus</cmdp:resourceType>
                <cmdp:identificationInfo>
                  <cmdp:resourceName xml:lang="en">Texts from Norwegian Wikipedia</cmdp:resourceName>
                  <cmdp:resourceName xml:lang="nb">Tekster fra norsk Wikipedia</cmdp:resourceName>
                  <cmdp:description xml:lang="en">This corpus is a dump from approximately March 20 2019 of all Wikipedia articles written in Norwegian Bokmål, Norwegian Nynorsk and Northern Sami. The corpus contains 492,864 articles for Norwegian Bokmål, 139,927 articles for Norwegian Nynorsk and 7,626 articles for Northern Sami. The files are structured as a JSON Array of all the articles as they appear on the web. Each article is a structured element, with one level of "key:value" pairs containing text and metadata. There are eight such key:value pairs per article:

- bytelength: length of text in number of bytes
- pageid: text identifier
- title: title as in Wikipedia
- hiddencategories: metadata
- text: text as in Wikipedia
- revised: audit information
- contentcategories: metadata
- wikidata: other data

An example of the JSON format can be found in the documentation file.</cmdp:description>
                  <cmdp:description xml:lang="nb">Dette korpuset inneholder en dump av samtlige Wikipediaartikler på bokmål, nynorsk og nordsamisk fra ca. 20. mars 2019. Korpuset inneholder 492.864 artikler for bokmål, 139.927 artikler for nynorsk og 7.626 artikler for nordsamisk. Korpuset er strukturert som et JSON-array over artiklene slik de foreligger på nettet. Hver artikkel er et strukturert element, med ett nivå av "nøkkel:verdi", som inneholder tekst og metadata. Det er åtte slike nøkkel:verdi-par i artiklene:

- bytelength: lengde på teksten i bytes
- pageid: identifikator for teksten
- title: tittel som i Wikipedia
- hiddencategories: metadata
- text: teksten som i Wikipedia
- revid: revisjonsinformasjon
- contentcategories: metadata
- wikidata: andre data

Et eksempel på JSON-formatet finnes i dokumentasjonsfilen.</cmdp:description>
                  <cmdp:url cmd:description="resource homepage">https://www.nb.no/sprakbanken/ressurskatalog/oai-nb-no-sbr-50/</cmdp:url>
                  <cmdp:PID cmd:description="hdl">hdl:21.11146/50</cmdp:PID>
                  <cmdp:identifier>sbr-50</cmdp:identifier>
                </cmdp:identificationInfo>
                <cmdp:distributionInfo>
                  <cmdp:licenceInfo>
                    <cmdp:userCategory>Restricted</cmdp:userCategory>
                    <cmdp:distributionAccessMedium>downloadable</cmdp:distributionAccessMedium>
                    <cmdp:downloadLocation cmd:description="resource homepage">https://www.nb.no/sprakbanken/ressurskatalog/oai-nb-no-sbr-50/</cmdp:downloadLocation>
                    <cmdp:licence>
                      <cmdp:licenceFamily>Creative Commons (CC)</cmdp:licenceFamily>
                      <cmdp:licenceName>Creative_Commons-BY-SA (CC-BY-SA)</cmdp:licenceName>
                      <cmdp:licenceURL>https://creativecommons.org/licenses/by-sa/3.0/</cmdp:licenceURL>
                      <cmdp:conditionsOfUse>BY</cmdp:conditionsOfUse>
                      <cmdp:conditionsOfUse>SA</cmdp:conditionsOfUse>
                    </cmdp:licence>
                    <cmdp:licensor>
                      <cmdp:actorInfo>
                        <cmdp:actorType>organization</cmdp:actorType>
                        <cmdp:role xml:lang="en">Licensor</cmdp:role>
                        <cmdp:organizationInfo>
                          <cmdp:organizationName xml:lang="en">Wikimedia Norge</cmdp:organizationName>
                          <cmdp:organizationName xml:lang="nb">Wikimedia Norge</cmdp:organizationName>
                        </cmdp:organizationInfo>
                      </cmdp:actorInfo>
                    </cmdp:licensor>
                    <cmdp:distributionRightsHolder>
                      <cmdp:actorInfo>
                        <cmdp:actorType>organization</cmdp:actorType>
                        <cmdp:role xml:lang="en">Distribution Rights Holder</cmdp:role>
                        <cmdp:organizationInfo>
                          <cmdp:organizationName xml:lang="en">National Library of Norway</cmdp:organizationName>
                          <cmdp:organizationName xml:lang="nb">Nasjonalbiblioteket</cmdp:organizationName>
                          <cmdp:organizationShortName xml:lang="en">NLN</cmdp:organizationShortName>
                          <cmdp:organizationShortName xml:lang="nb">NB</cmdp:organizationShortName>
                          <cmdp:departmentName xml:lang="en">The Language Bank</cmdp:departmentName>
                          <cmdp:departmentName xml:lang="nb">Språkbanken</cmdp:departmentName>
                        </cmdp:organizationInfo>
                        <cmdp:communicationInfo>
                          <cmdp:email>sprakbanken@nb.no</cmdp:email>
                          <cmdp:url>https://www.nb.no/sprakbanken/</cmdp:url>
                          <cmdp:address>P.O. Box 2674 Solli</cmdp:address>
                          <cmdp:zipCode>0203</cmdp:zipCode>
                          <cmdp:city>Oslo</cmdp:city>
                          <cmdp:region>Oslo</cmdp:region>
                          <cmdp:country>Norway</cmdp:country>
                        </cmdp:communicationInfo>
                      </cmdp:actorInfo>
                    </cmdp:distributionRightsHolder>
                  </cmdp:licenceInfo>
                  <cmdp:iprHolder>
                    <cmdp:actorInfo>
                      <cmdp:actorType>organization</cmdp:actorType>
                      <cmdp:role xml:lang="en">IPR Holder</cmdp:role>
                      <cmdp:organizationInfo>
                        <cmdp:organizationName xml:lang="en">Wikimedia Norge</cmdp:organizationName>
                        <cmdp:organizationName xml:lang="nb">Wikimedia Norge</cmdp:organizationName>
                      </cmdp:organizationInfo>
                    </cmdp:actorInfo>
                  </cmdp:iprHolder>
                </cmdp:distributionInfo>
                <cmdp:contact>
                  <cmdp:actorInfo>
                    <cmdp:actorType>organization</cmdp:actorType>
                    <cmdp:role xml:lang="en">Contact</cmdp:role>
                    <cmdp:organizationInfo>
                      <cmdp:organizationName xml:lang="en">National Library of Norway</cmdp:organizationName>
                      <cmdp:organizationName xml:lang="nb">Nasjonalbiblioteket</cmdp:organizationName>
                      <cmdp:organizationShortName xml:lang="en">NLN</cmdp:organizationShortName>
                      <cmdp:organizationShortName xml:lang="nb">NB</cmdp:organizationShortName>
                      <cmdp:departmentName xml:lang="en">The Language Bank</cmdp:departmentName>
                      <cmdp:departmentName xml:lang="nb">Språkbanken</cmdp:departmentName>
                    </cmdp:organizationInfo>
                    <cmdp:communicationInfo>
                      <cmdp:email>sprakbanken@nb.no</cmdp:email>
                      <cmdp:url>https://www.nb.no/sprakbanken/</cmdp:url>
                      <cmdp:address>P.O. Box 2674 Solli</cmdp:address>
                      <cmdp:zipCode>0203</cmdp:zipCode>
                      <cmdp:city>Oslo</cmdp:city>
                      <cmdp:region>Oslo</cmdp:region>
                      <cmdp:country>Norway</cmdp:country>
                    </cmdp:communicationInfo>
                  </cmdp:actorInfo>
                </cmdp:contact>
                <cmdp:metadataInfo>
                  <cmdp:metadataCreationDate>2019-06-18</cmdp:metadataCreationDate>
                  <cmdp:metadataLanguageName>English</cmdp:metadataLanguageName>
                  <cmdp:metadataLanguageId>en</cmdp:metadataLanguageId>
                  <cmdp:metadataLastDateUpdated>2023-08-07</cmdp:metadataLastDateUpdated>
                  <cmdp:metadataCreator>
                    <cmdp:actorInfo>
                      <cmdp:actorType>person</cmdp:actorType>
                      <cmdp:role xml:lang="en">Metadata Creator</cmdp:role>
                      <cmdp:personInfo>
                        <cmdp:surname xml:lang="nb">Lindstad</cmdp:surname>
                        <cmdp:givenName xml:lang="nb">Arne Martinus</cmdp:givenName>
                        <cmdp:affiliation>
                          <cmdp:organizationInfo>
                            <cmdp:organizationName xml:lang="en">National Library of Norway</cmdp:organizationName>
                            <cmdp:organizationName xml:lang="nb">Nasjonalbiblioteket</cmdp:organizationName>
                            <cmdp:organizationShortName xml:lang="en">NLN</cmdp:organizationShortName>
                            <cmdp:organizationShortName xml:lang="nb">NB</cmdp:organizationShortName>
                            <cmdp:departmentName xml:lang="en">The Language Bank</cmdp:departmentName>
                            <cmdp:departmentName xml:lang="nb">Språkbanken</cmdp:departmentName>
                          </cmdp:organizationInfo>
                        </cmdp:affiliation>
                      </cmdp:personInfo>
                      <cmdp:communicationInfo>
                        <cmdp:email>sprakbanken@nb.no</cmdp:email>
                        <cmdp:url>https://www.nb.no/sprakbanken/</cmdp:url>
                        <cmdp:address>P.O. Box 2674 Solli</cmdp:address>
                        <cmdp:zipCode>0203</cmdp:zipCode>
                        <cmdp:city>Oslo</cmdp:city>
                        <cmdp:region>Oslo</cmdp:region>
                        <cmdp:country>Norway</cmdp:country>
                      </cmdp:communicationInfo>
                    </cmdp:actorInfo>
                  </cmdp:metadataCreator>
                </cmdp:metadataInfo>
                <cmdp:versionInfo>
                  <cmdp:version>2019</cmdp:version>
                  <cmdp:lastDateUpdated>2019-03-22</cmdp:lastDateUpdated>
                </cmdp:versionInfo>
                <cmdp:validationInfo>
                  <cmdp:validated>false</cmdp:validated>
                </cmdp:validationInfo>
                <cmdp:resourceDocumentationInfo>
                  <cmdp:documentationUnstructured>
                    <cmdp:role>documentation</cmdp:role>
                    <cmdp:documentUnstructured>https://www.nb.no/sbfil/dok/2019_wikipedia.pdf</cmdp:documentUnstructured>
                  </cmdp:documentationUnstructured>
                </cmdp:resourceDocumentationInfo>
                <cmdp:resourceCreationInfo>
                  <cmdp:creationStartDate>2007-06-23</cmdp:creationStartDate>
                  <cmdp:creationEndDate>2019-03-22</cmdp:creationEndDate>
                  <cmdp:resourceCreator>
                    <cmdp:actorInfo>
                      <cmdp:actorType>organization</cmdp:actorType>
                      <cmdp:role xml:lang="en">Resource Creator</cmdp:role>
                      <cmdp:organizationInfo>
                        <cmdp:organizationName xml:lang="en">Wikimedia Norge</cmdp:organizationName>
                        <cmdp:organizationName xml:lang="nb">Wikimedia Norge</cmdp:organizationName>
                      </cmdp:organizationInfo>
                    </cmdp:actorInfo>
                  </cmdp:resourceCreator>
                </cmdp:resourceCreationInfo>
              </cmdp:resourceCommonInfo>
              <cmdp:corpusInfo>
                <cmdp:corpusType>Written Corpus</cmdp:corpusType>
                <cmdp:corpusPartInfo>
                  <cmdp:mediaType>text</cmdp:mediaType>
                  <cmdp:corpusTextInfo>
                    <cmdp:textFormatInfo>
                      <cmdp:mimeType>application/json</cmdp:mimeType>
                      <cmdp:sizePerTextFormat>
                        <cmdp:sizeInfo>
                          <cmdp:size>640417</cmdp:size>
                          <cmdp:sizeUnit>articles</cmdp:sizeUnit>
                        </cmdp:sizeInfo>
                        <cmdp:sizeInfo>
                          <cmdp:size>3</cmdp:size>
                          <cmdp:sizeUnit>files</cmdp:sizeUnit>
                        </cmdp:sizeInfo>
                      </cmdp:sizePerTextFormat>
                    </cmdp:textFormatInfo>
                    <cmdp:characterEncodingInfo>
                      <cmdp:characterEncoding>UTF-8</cmdp:characterEncoding>
                    </cmdp:characterEncodingInfo>
                  </cmdp:corpusTextInfo>
                </cmdp:corpusPartInfo>
                <cmdp:corpusPartGeneralInfo>
                  <cmdp:lingualityInfo>
                    <cmdp:lingualityType>monolingual</cmdp:lingualityType>
                  </cmdp:lingualityInfo>
                  <cmdp:languageInfo>
                    <cmdp:languageId>nb</cmdp:languageId>
                    <cmdp:languageName>Norwegian Bokmål</cmdp:languageName>
                    <cmdp:sizePerLanguage>
                      <cmdp:sizeInfo>
                        <cmdp:size>492864</cmdp:size>
                        <cmdp:sizeUnit>articles</cmdp:sizeUnit>
                      </cmdp:sizeInfo>
                      <cmdp:sizeInfo>
                        <cmdp:size>1</cmdp:size>
                        <cmdp:sizeUnit>files</cmdp:sizeUnit>
                      </cmdp:sizeInfo>
                      <cmdp:sizeInfo>
                        <cmdp:size>1,3</cmdp:size>
                        <cmdp:sizeUnit>gb</cmdp:sizeUnit>
                      </cmdp:sizeInfo>
                    </cmdp:sizePerLanguage>
                  </cmdp:languageInfo>
                  <cmdp:languageInfo>
                    <cmdp:languageId>nn</cmdp:languageId>
                    <cmdp:languageName>Norwegian Nynorsk</cmdp:languageName>
                    <cmdp:sizePerLanguage>
                      <cmdp:sizeInfo>
                        <cmdp:size>139927</cmdp:size>
                        <cmdp:sizeUnit>articles</cmdp:sizeUnit>
                      </cmdp:sizeInfo>
                      <cmdp:sizeInfo>
                        <cmdp:size>1</cmdp:size>
                        <cmdp:sizeUnit>files</cmdp:sizeUnit>
                      </cmdp:sizeInfo>
                      <cmdp:sizeInfo>
                        <cmdp:size>300</cmdp:size>
                        <cmdp:sizeUnit>mb</cmdp:sizeUnit>
                      </cmdp:sizeInfo>
                    </cmdp:sizePerLanguage>
                  </cmdp:languageInfo>
                  <cmdp:languageInfo>
                    <cmdp:languageId>se</cmdp:languageId>
                    <cmdp:languageName>Northern Sami</cmdp:languageName>
                    <cmdp:sizePerLanguage>
                      <cmdp:sizeInfo>
                        <cmdp:size>7626</cmdp:size>
                        <cmdp:sizeUnit>articles</cmdp:sizeUnit>
                      </cmdp:sizeInfo>
                      <cmdp:sizeInfo>
                        <cmdp:size>1</cmdp:size>
                        <cmdp:sizeUnit>files</cmdp:sizeUnit>
                      </cmdp:sizeInfo>
                      <cmdp:sizeInfo>
                        <cmdp:size>10</cmdp:size>
                        <cmdp:sizeUnit>mb</cmdp:sizeUnit>
                      </cmdp:sizeInfo>
                    </cmdp:sizePerLanguage>
                  </cmdp:languageInfo>
                  <cmdp:modalityInfo>
                    <cmdp:modalityType>writtenLanguage</cmdp:modalityType>
                  </cmdp:modalityInfo>
                  <cmdp:timeCoverageInfo>
                    <cmdp:timeCoverage>2007-2019</cmdp:timeCoverage>
                  </cmdp:timeCoverageInfo>
                </cmdp:corpusPartGeneralInfo>
              </cmdp:corpusInfo>
            </cmdp:corpusProfile>
          </cmd:Components>
        </cmd:CMD>
      </metadata>
    </record>
  </GetRecord>
</OAI-PMH>