<OAI-PMH xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.openarchives.org/OAI/2.0/" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/          http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
  <responseDate>2026-04-23T22:52:19.234Z</responseDate>
  <request verb="GetRecord">https://www.nb.no/sprakbanken/oai</request>
  <GetRecord>
    <record>
      <header>
        <identifier>oai:nb.no:sbr-66</identifier>
        <datestamp/>
      </header>
      <metadata>
        <cmd:CMD xmlns:cmd="http://www.clarin.eu/cmd/1" xmlns="http://www.clarin.eu/cmd/" xmlns:cmdp="http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1407745711925" CMDVersion="1.2" xsi:schemaLocation="http://www.clarin.eu/cmd/1 https://infra.clarin.eu/CMDI/1.x/xsd/cmd-envelop.xsd http://www.clarin.eu/cmd/1/profiles/clarin.eu:cr1:p_1407745711925 https://catalog.clarin.eu/ds/ComponentRegistry/rest/registry/1.1/profiles/clarin.eu:cr1:p_1407745711925/1.2/xsd">
          <cmd:Header>
            <cmd:MdCreator>Arne Martinus Lindstad</cmd:MdCreator>
            <cmd:MdCreationDate>2021-09-23</cmd:MdCreationDate>
            <cmd:MdSelfLink>https://www.nb.no/sprakbanken/oai?verb=GetRecord&amp;identifier=oai:nb.no:sbr-66&amp;metadataPrefix=cmdi</cmd:MdSelfLink>
            <cmd:MdProfile>clarin.eu:cr1:p_1407745711925</cmd:MdProfile>
            <cmd:MdCollectionDisplayName>Språkbanken NB</cmd:MdCollectionDisplayName>
          </cmd:Header>
          <cmd:Resources>
            <cmd:ResourceProxyList>
              <cmd:ResourceProxy id="wikidisc">
                <cmd:ResourceType mimetype="application/zip">Resource</cmd:ResourceType>
                <cmd:ResourceRef>https://www.nb.no/sbfil/tekst/2019_wikidisc.zip</cmd:ResourceRef>
              </cmd:ResourceProxy>
              <cmd:ResourceProxy id="wikidisc_doc">
                <cmd:ResourceType mimetype="application/pdf">Resource</cmd:ResourceType>
                <cmd:ResourceRef>https://www.nb.no/sbfil/dok/2019_wikidisc.pdf</cmd:ResourceRef>
              </cmd:ResourceProxy>
            </cmd:ResourceProxyList>
            <cmd:JournalFileProxyList/>
            <cmd:ResourceRelationList>
              <cmd:ResourceRelation>
                <cmd:RelationType>describes</cmd:RelationType>
                <cmd:Resource>
                  <cmd:Role>
                    <cmd:Resource>
                      <cmd:Role/>
                    </cmd:Resource>
                  </cmd:Role>
                </cmd:Resource>
              </cmd:ResourceRelation>
            </cmd:ResourceRelationList>
          </cmd:Resources>
          <cmd:IsPartOfList/>
          <cmd:Components>
            <cmdp:corpusProfile>
              <cmdp:resourceCommonInfo>
                <cmdp:resourceType>corpus</cmdp:resourceType>
                <cmdp:identificationInfo>
                  <cmdp:resourceName xml:lang="nn">Diskusjonstekster frå Wikipedia</cmdp:resourceName>
                  <cmdp:resourceName xml:lang="en">Discussions from Wikipedia</cmdp:resourceName>
                  <cmdp:description xml:lang="nn">Dette korpuset inneheld ein dump av diskusjonstrådar frå Wikipedia, der forfattarar diskuterer ulike problemstillingar i samband med publisering av bestemde artiklar på Wikipedia.

Artiklane er fordelte på to filer, ei for høvesvis bokmål (nb.wikipedia.json) og nynorsk (nn.wikipedia.json). Kvar diskusjon er eit element i eit json-array, med eitt nivå som inneheld tekst og diverse metadata. Det er åtte datafelt per diskusjon:

- title: tittel på artikkelen som vert diskutert
- pageid: identifikator for artikkelen
- revid: revisjonsinformasjon
- wikidata: ev. andre data
- contentcategories: metadata
- hiddencategories: metadata
- text: diskusjonstekst
- bytelength: lengde på teksten i bytes

Eit døme på dette finst i dokumentasjonsfila (2019_wikidisc.pdf).</cmdp:description>
                  <cmdp:description xml:lang="en">This corpus is a dump of discussion threads from the Norwegian Wikipedia, where authors discuss various issues regarding the publication of specific Wikipedia articles.

The material is split into two files, one each for Norwegian Bokmål (nb.wikipedia.json) and Nynorsk (nn.wikipedia.json). Each file is a structured JSON array. One discussion corresponds to one element, with one level containing text and metadata. There are eight key/value pairs per discussion:

- title: title of article under discussion
- pageid: text identifier
- revid: audit information
- wikidata: other data
- contentcategories: metadata
- hiddencategories: metadata
- text: discussion text
- bytelength: length of text in number of bytes

An example of this can be found in the pdf file (2019_wikidisc.pdf).</cmdp:description>
                  <cmdp:url cmd:description="resource homepage">https://www.nb.no/sprakbanken/ressurskatalog/oai-nb-no-sbr-66/</cmdp:url>
                  <cmdp:PID cmd:description="hdl">hdl:21.11146/66</cmdp:PID>
                  <cmdp:identifier>sbr-66</cmdp:identifier>
                </cmdp:identificationInfo>
                <cmdp:distributionInfo>
                  <cmdp:licenceInfo>
                    <cmdp:userCategory>Public</cmdp:userCategory>
                    <cmdp:distributionAccessMedium>downloadable</cmdp:distributionAccessMedium>
                    <cmdp:downloadLocation cmd:description="resource homepage">https://www.nb.no/sprakbanken/ressurskatalog/oai-nb-no-sbr-66/</cmdp:downloadLocation>
                    <cmdp:licence>
                      <cmdp:licenceFamily>Creative Commons (CC)</cmdp:licenceFamily>
                      <cmdp:licenceName>Creative_Commons-BY-SA (CC-BY-SA)</cmdp:licenceName>
                      <cmdp:licenceURL>https://creativecommons.org/licenses/by-sa/4.0/</cmdp:licenceURL>
                      <cmdp:conditionsOfUse>BY</cmdp:conditionsOfUse>
                      <cmdp:conditionsOfUse>SA</cmdp:conditionsOfUse>
                    </cmdp:licence>
                    <cmdp:licensor>
                      <cmdp:actorInfo>
                        <cmdp:actorType>organization</cmdp:actorType>
                        <cmdp:role xml:lang="en">Licensor</cmdp:role>
                        <cmdp:organizationInfo>
                          <cmdp:organizationName xml:lang="nn">Wikimedia Norge</cmdp:organizationName>
                          <cmdp:organizationName xml:lang="en">Wikimedia Norge</cmdp:organizationName>
                        </cmdp:organizationInfo>
                      </cmdp:actorInfo>
                    </cmdp:licensor>
                  </cmdp:licenceInfo>
                  <cmdp:iprHolder>
                    <cmdp:actorInfo>
                      <cmdp:actorType>organization</cmdp:actorType>
                      <cmdp:role xml:lang="en">IPR Holder</cmdp:role>
                      <cmdp:organizationInfo>
                        <cmdp:organizationName xml:lang="nn">Wikimedia Norge</cmdp:organizationName>
                        <cmdp:organizationName xml:lang="en">Wikimedia Norge</cmdp:organizationName>
                      </cmdp:organizationInfo>
                    </cmdp:actorInfo>
                  </cmdp:iprHolder>
                </cmdp:distributionInfo>
                <cmdp:contact>
                  <cmdp:actorInfo>
                    <cmdp:actorType>organization</cmdp:actorType>
                    <cmdp:role xml:lang="en">Contact</cmdp:role>
                    <cmdp:organizationInfo>
                      <cmdp:organizationName xml:lang="en">National Library of Norway</cmdp:organizationName>
                      <cmdp:organizationName xml:lang="nn">Nasjonalbiblioteket</cmdp:organizationName>
                      <cmdp:organizationShortName xml:lang="en">NLN</cmdp:organizationShortName>
                      <cmdp:organizationShortName xml:lang="nn">NB</cmdp:organizationShortName>
                      <cmdp:departmentName xml:lang="en">The Language Bank</cmdp:departmentName>
                      <cmdp:departmentName xml:lang="nn">Språkbanken</cmdp:departmentName>
                    </cmdp:organizationInfo>
                    <cmdp:communicationInfo>
                      <cmdp:email>sprakbanken@nb.no</cmdp:email>
                      <cmdp:url>https://www.nb.no/sprakbanken/</cmdp:url>
                      <cmdp:address>P.O. Box 2674 Solli</cmdp:address>
                      <cmdp:zipCode>0203</cmdp:zipCode>
                      <cmdp:city>Oslo</cmdp:city>
                      <cmdp:region>Oslo</cmdp:region>
                      <cmdp:country>Norway</cmdp:country>
                    </cmdp:communicationInfo>
                  </cmdp:actorInfo>
                </cmdp:contact>
                <cmdp:metadataInfo>
                  <cmdp:metadataCreationDate>2021-09-23</cmdp:metadataCreationDate>
                  <cmdp:metadataLanguageName>English</cmdp:metadataLanguageName>
                  <cmdp:metadataLanguageId>en</cmdp:metadataLanguageId>
                  <cmdp:metadataLastDateUpdated>2023-08-07</cmdp:metadataLastDateUpdated>
                  <cmdp:metadataCreator>
                    <cmdp:actorInfo>
                      <cmdp:actorType>person</cmdp:actorType>
                      <cmdp:role xml:lang="en">Metadata Creator</cmdp:role>
                      <cmdp:personInfo>
                        <cmdp:surname xml:lang="nn">Lindstad</cmdp:surname>
                        <cmdp:givenName xml:lang="nn">Arne Martinus</cmdp:givenName>
                        <cmdp:affiliation>
                          <cmdp:organizationInfo>
                            <cmdp:organizationName xml:lang="en">National Library of Norway</cmdp:organizationName>
                            <cmdp:organizationName xml:lang="nn">Nasjonalbiblioteket</cmdp:organizationName>
                            <cmdp:organizationShortName xml:lang="en">NLN</cmdp:organizationShortName>
                            <cmdp:organizationShortName xml:lang="nn">NB</cmdp:organizationShortName>
                            <cmdp:departmentName xml:lang="en">The Language Bank</cmdp:departmentName>
                            <cmdp:departmentName xml:lang="nn">Språkbanken</cmdp:departmentName>
                          </cmdp:organizationInfo>
                        </cmdp:affiliation>
                      </cmdp:personInfo>
                      <cmdp:communicationInfo>
                        <cmdp:email>sprakbanken@nb.no</cmdp:email>
                        <cmdp:url>https://www.nb.no/sprakbanken/</cmdp:url>
                        <cmdp:address>P.O. Box 2674 Solli</cmdp:address>
                        <cmdp:zipCode>0203</cmdp:zipCode>
                        <cmdp:city>Oslo</cmdp:city>
                        <cmdp:region>Oslo</cmdp:region>
                        <cmdp:country>Norway</cmdp:country>
                      </cmdp:communicationInfo>
                    </cmdp:actorInfo>
                  </cmdp:metadataCreator>
                </cmdp:metadataInfo>
                <cmdp:versionInfo>
                  <cmdp:version>2019</cmdp:version>
                  <cmdp:lastDateUpdated>2019-12-11</cmdp:lastDateUpdated>
                </cmdp:versionInfo>
                <cmdp:validationInfo>
                  <cmdp:validated>false</cmdp:validated>
                </cmdp:validationInfo>
                <cmdp:resourceCreationInfo>
                  <cmdp:creationEndDate>2019-12-11</cmdp:creationEndDate>
                  <cmdp:resourceCreator>
                    <cmdp:actorInfo>
                      <cmdp:actorType>organization</cmdp:actorType>
                      <cmdp:role xml:lang="en">Resource Creator</cmdp:role>
                      <cmdp:organizationInfo>
                        <cmdp:organizationName xml:lang="nn">Wikimedia Norge</cmdp:organizationName>
                        <cmdp:organizationName xml:lang="en">Wikimedia Norge</cmdp:organizationName>
                      </cmdp:organizationInfo>
                    </cmdp:actorInfo>
                  </cmdp:resourceCreator>
                </cmdp:resourceCreationInfo>
              </cmdp:resourceCommonInfo>
              <cmdp:corpusInfo>
                <cmdp:corpusType>Written Corpus</cmdp:corpusType>
                <cmdp:corpusPartInfo>
                  <cmdp:mediaType>text</cmdp:mediaType>
                  <cmdp:corpusTextInfo>
                    <cmdp:textFormatInfo>
                      <cmdp:mimeType>application/json</cmdp:mimeType>
                      <cmdp:sizePerTextFormat>
                        <cmdp:sizeInfo>
                          <cmdp:size>2</cmdp:size>
                          <cmdp:sizeUnit>files</cmdp:sizeUnit>
                        </cmdp:sizeInfo>
                        <cmdp:sizeInfo>
                          <cmdp:size>36864</cmdp:size>
                          <cmdp:sizeUnit>entries</cmdp:sizeUnit>
                        </cmdp:sizeInfo>
                        <cmdp:sizeInfo>
                          <cmdp:size>136,7</cmdp:size>
                          <cmdp:sizeUnit>mb</cmdp:sizeUnit>
                        </cmdp:sizeInfo>
                        <cmdp:sizeInfo>
                          <cmdp:size>18400000</cmdp:size>
                          <cmdp:sizeUnit>words</cmdp:sizeUnit>
                        </cmdp:sizeInfo>
                      </cmdp:sizePerTextFormat>
                    </cmdp:textFormatInfo>
                    <cmdp:characterEncodingInfo>
                      <cmdp:characterEncoding>UTF-8</cmdp:characterEncoding>
                    </cmdp:characterEncodingInfo>
                  </cmdp:corpusTextInfo>
                </cmdp:corpusPartInfo>
                <cmdp:corpusPartGeneralInfo>
                  <cmdp:lingualityInfo>
                    <cmdp:lingualityType>multilingual</cmdp:lingualityType>
                    <cmdp:multilingualityType>other</cmdp:multilingualityType>
                    <cmdp:multilingualityTypeDetails>Discussions of a similar kind in either Norwegian Bokmål or Norwegian Nynorsk</cmdp:multilingualityTypeDetails>
                  </cmdp:lingualityInfo>
                  <cmdp:languageInfo>
                    <cmdp:languageId>nb</cmdp:languageId>
                    <cmdp:languageName>Norwegian Bokmål</cmdp:languageName>
                    <cmdp:sizePerLanguage>
                      <cmdp:sizeInfo>
                        <cmdp:size>17000000</cmdp:size>
                        <cmdp:sizeUnit>words</cmdp:sizeUnit>
                      </cmdp:sizeInfo>
                      <cmdp:sizeInfo>
                        <cmdp:size>31364</cmdp:size>
                        <cmdp:sizeUnit>entries</cmdp:sizeUnit>
                      </cmdp:sizeInfo>
                      <cmdp:sizeInfo>
                        <cmdp:size>1</cmdp:size>
                        <cmdp:sizeUnit>files</cmdp:sizeUnit>
                      </cmdp:sizeInfo>
                      <cmdp:sizeInfo>
                        <cmdp:size>126,4</cmdp:size>
                        <cmdp:sizeUnit>mb</cmdp:sizeUnit>
                      </cmdp:sizeInfo>
                    </cmdp:sizePerLanguage>
                    <cmdp:languageVarietyInfo>
                      <cmdp:languageVarietyType>jargon</cmdp:languageVarietyType>
                      <cmdp:languageVarietyName>Informal written language</cmdp:languageVarietyName>
                    </cmdp:languageVarietyInfo>
                  </cmdp:languageInfo>
                  <cmdp:languageInfo>
                    <cmdp:languageId>nn</cmdp:languageId>
                    <cmdp:languageName>Norwegian Nynorsk</cmdp:languageName>
                    <cmdp:sizePerLanguage>
                      <cmdp:sizeInfo>
                        <cmdp:size>1400000</cmdp:size>
                        <cmdp:sizeUnit>words</cmdp:sizeUnit>
                      </cmdp:sizeInfo>
                      <cmdp:sizeInfo>
                        <cmdp:size>5500</cmdp:size>
                        <cmdp:sizeUnit>entries</cmdp:sizeUnit>
                      </cmdp:sizeInfo>
                      <cmdp:sizeInfo>
                        <cmdp:size>1</cmdp:size>
                        <cmdp:sizeUnit>files</cmdp:sizeUnit>
                      </cmdp:sizeInfo>
                      <cmdp:sizeInfo>
                        <cmdp:size>10,3</cmdp:size>
                        <cmdp:sizeUnit>mb</cmdp:sizeUnit>
                      </cmdp:sizeInfo>
                    </cmdp:sizePerLanguage>
                    <cmdp:languageVarietyInfo>
                      <cmdp:languageVarietyType>jargon</cmdp:languageVarietyType>
                      <cmdp:languageVarietyName>Informal written language</cmdp:languageVarietyName>
                    </cmdp:languageVarietyInfo>
                  </cmdp:languageInfo>
                  <cmdp:modalityInfo>
                    <cmdp:modalityType>writtenLanguage</cmdp:modalityType>
                  </cmdp:modalityInfo>
                  <cmdp:sizeInfo>
                    <cmdp:size>18400000</cmdp:size>
                    <cmdp:sizeUnit>words</cmdp:sizeUnit>
                  </cmdp:sizeInfo>
                  <cmdp:sizeInfo>
                    <cmdp:size>36864</cmdp:size>
                    <cmdp:sizeUnit>entries</cmdp:sizeUnit>
                  </cmdp:sizeInfo>
                  <cmdp:sizeInfo>
                    <cmdp:size>2</cmdp:size>
                    <cmdp:sizeUnit>files</cmdp:sizeUnit>
                  </cmdp:sizeInfo>
                  <cmdp:sizeInfo>
                    <cmdp:size>136,7</cmdp:size>
                    <cmdp:sizeUnit>mb</cmdp:sizeUnit>
                  </cmdp:sizeInfo>
                </cmdp:corpusPartGeneralInfo>
              </cmdp:corpusInfo>
            </cmdp:corpusProfile>
          </cmd:Components>
        </cmd:CMD>
      </metadata>
    </record>
  </GetRecord>
</OAI-PMH>