Parsers available to Apache Tika

CompositeParser

Class: org.apache.tika.parser.CompositeParser

Composite Parser

DefaultParser

Class: org.apache.tika.parser.DefaultParser

Composite Parser

AppleSingleFileParser

Class: org.apache.tika.parser.apple.AppleSingleFileParser

Mime Types:

  • application/applefile

PListParser

Class: org.apache.tika.parser.apple.PListParser

Mime Types:

  • application/x-plist
  • application/x-bplist-itunes
  • application/x-bplist
  • application/x-bplist-memgraph
  • application/x-bplist-webarchive

ClassParser

Class: org.apache.tika.parser.asm.ClassParser

Mime Types:

  • application/java-vm

AudioParser

Class: org.apache.tika.parser.audio.AudioParser

Mime Types:

  • audio/vnd.wave
  • audio/x-wav
  • audio/basic
  • audio/x-aiff

MidiParser

Class: org.apache.tika.parser.audio.MidiParser

Mime Types:

  • application/x-midi
  • audio/midi

ChmParser

Class: org.apache.tika.parser.chm.ChmParser

Mime Types:

  • application/vnd.ms-htmlhelp
  • application/x-chm
  • application/chm

SourceCodeParser

Class: org.apache.tika.parser.code.SourceCodeParser

Mime Types:

  • text/x-c++src
  • text/x-groovy
  • text/x-java-source

Pkcs7Parser

Class: org.apache.tika.parser.crypto.Pkcs7Parser

Mime Types:

  • application/pkcs7-signature
  • application/pkcs7-mime

TSDParser

Class: org.apache.tika.parser.crypto.TSDParser

Mime Types:

  • application/timestamped-data

TextAndCSVParser

Class: org.apache.tika.parser.csv.TextAndCSVParser

Mime Types:

  • text/csv
  • text/tsv
  • text/plain

DBFParser

Class: org.apache.tika.parser.dbf.DBFParser

Mime Types:

  • application/x-dbf

DIFParser

Class: org.apache.tika.parser.dif.DIFParser

Mime Types:

  • application/dif+xml

DWGParser

Class: org.apache.tika.parser.dwg.DWGParser

Mime Types:

  • image/vnd.dwg

EpubParser

Class: org.apache.tika.parser.epub.EpubParser

Mime Types:

  • application/x-ibooks+zip
  • application/epub+zip

ExecutableParser

Class: org.apache.tika.parser.executable.ExecutableParser

Mime Types:

  • application/x-msdownload
  • application/x-sharedlib
  • application/x-elf
  • application/x-object
  • application/x-executable
  • application/x-coredump

FeedParser

Class: org.apache.tika.parser.feed.FeedParser

Mime Types:

  • application/atom+xml
  • application/rss+xml

AdobeFontMetricParser

Class: org.apache.tika.parser.font.AdobeFontMetricParser

Mime Types:

  • application/x-font-adobe-metric

TrueTypeParser

Class: org.apache.tika.parser.font.TrueTypeParser

Mime Types:

  • application/x-font-ttf

GDALParser

Class: org.apache.tika.parser.gdal.GDALParser

Mime Types:

  • application/x-gsc
  • image/x-ozi
  • application/x-pds
  • image/eir
  • application/x-usgs-dem
  • application/aaigrid
  • application/x-bag
  • application/elas
  • application/x-rs2
  • application/x-tsx
  • application/x-lcp
  • image/geotiff
  • application/x-mbtiles
  • application/x-cappi
  • application/x-netcdf
  • application/x-gsag
  • application/x-epsilon
  • application/x-ace2
  • application/jaxa-pal-sar
  • image/x-pcraster
  • application/x-msgn
  • image/arg
  • application/x-hdf
  • image/x-mff
  • application/x-kro
  • image/x-hdf5-image
  • image/x-dimap
  • image/x-srp
  • image/big-gif
  • application/x-envi
  • application/x-cosar
  • application/x-ntv2
  • image/bmp
  • application/x-doq2
  • application/x-bt
  • application/x-kml
  • application/x-gmt
  • application/x-rst
  • application/vrt
  • application/pcisdk
  • application/x-ctg
  • application/x-e00-grid
  • application/x-rik
  • image/ida
  • image/x-mff2
  • application/sdts-raster
  • application/x-snodas
  • image/jp2
  • image/sar-ceos
  • application/terragen
  • application/x-wcs
  • application/leveller
  • application/x-ingr
  • application/x-gtx
  • image/sgi
  • application/x-pnm
  • image/raster
  • application/fits
  • application/x-r
  • image/gif
  • application/x-envi-hdr
  • application/x-http
  • application/x-rmf
  • application/x-ecrg-toc
  • application/aig
  • application/x-rpf-toc
  • image/adrg
  • application/x-srtmhgt
  • application/x-generic-bin
  • application/jdem
  • image/x-airsar
  • application/x-webp
  • application/x-ngs-geoid
  • application/x-pcidsk
  • image/x-fujibas
  • application/x-wms
  • application/x-map
  • image/ceos
  • application/xpm
  • application/x-zmap
  • image/envisat
  • application/x-ers
  • application/x-doq1
  • application/x-isis2
  • application/x-nwt-grd
  • application/x-ppi
  • image/ilwis
  • application/x-isis3
  • application/x-nwt-grc
  • application/x-blx
  • application/gff
  • application/x-ndf
  • image/jpeg
  • application/x-geo-pdf
  • application/x-l1b
  • image/fit
  • application/x-gsbg
  • application/x-sdat
  • application/x-ctable2
  • application/x-grib
  • application/x-coasp
  • application/x-dipex
  • application/grass-ascii-grid
  • image/fits
  • application/x-til
  • application/x-dods
  • image/png
  • application/x-gxf
  • application/x-gs7bg
  • application/x-cpg
  • application/x-lan
  • application/x-xyz
  • image/bsb
  • application/x-p-aux
  • application/dted
  • application/x-rasterlite
  • image/nitf
  • image/hfa
  • application/x-fast
  • application/x-los-las

GeoParser

Class: org.apache.tika.parser.geo.topic.GeoParser

Mime Types:

  • application/geotopic

GeographicInformationParser

Class: org.apache.tika.parser.geoinfo.GeographicInformationParser

Mime Types:

  • text/iso19139+xml

GribParser

Class: org.apache.tika.parser.grib.GribParser

Mime Types:

  • application/x-grib2

HDFParser

Class: org.apache.tika.parser.hdf.HDFParser

Mime Types:

  • application/x-hdf

HwpV5Parser

Class: org.apache.tika.parser.hwp.HwpV5Parser

Mime Types:

  • application/x-hwp-v5

BPGParser

Class: org.apache.tika.parser.image.BPGParser

Mime Types:

  • image/bpg
  • image/x-bpg

HeifParser

Class: org.apache.tika.parser.image.HeifParser

Mime Types:

  • image/heic-sequence
  • image/heif
  • image/heic
  • image/heif-sequence

ICNSParser

Class: org.apache.tika.parser.image.ICNSParser

Mime Types:

  • image/icns

ImageParser

Class: org.apache.tika.parser.image.ImageParser

Mime Types:

  • image/png
  • image/vnd.wap.wbmp
  • image/x-jbig2
  • image/bmp
  • image/x-xcf
  • image/gif
  • image/x-icon
  • image/x-ms-bmp

PSDParser

Class: org.apache.tika.parser.image.PSDParser

Mime Types:

  • image/vnd.adobe.photoshop

TiffParser

Class: org.apache.tika.parser.image.TiffParser

Mime Types:

  • image/tiff

WebPParser

Class: org.apache.tika.parser.image.WebPParser

Mime Types:

  • image/webp

IDMLParser

Class: org.apache.tika.parser.indesign.IDMLParser

Mime Types:

  • application/vnd.adobe.indesign-idml-package

IptcAnpaParser

Class: org.apache.tika.parser.iptc.IptcAnpaParser

Mime Types:

  • text/vnd.iptc.anpa

ISArchiveParser

Class: org.apache.tika.parser.isatab.ISArchiveParser

Mime Types:

  • application/x-isatab

IWorkPackageParser

Class: org.apache.tika.parser.iwork.IWorkPackageParser

Mime Types:

  • application/vnd.apple.keynote
  • application/vnd.apple.iwork
  • application/vnd.apple.numbers
  • application/vnd.apple.pages

JpegParser

Class: org.apache.tika.parser.jpeg.JpegParser

Mime Types:

  • image/jpeg

RFC822Parser

Class: org.apache.tika.parser.mail.RFC822Parser

Mime Types:

  • message/rfc822

MatParser

Class: org.apache.tika.parser.mat.MatParser

Mime Types:

  • application/x-matlab-data

MboxParser

Class: org.apache.tika.parser.mbox.MboxParser

Mime Types:

  • application/mbox

OutlookPSTParser

Class: org.apache.tika.parser.mbox.OutlookPSTParser

Mime Types:

  • application/vnd.ms-outlook-pst

EMFParser

Class: org.apache.tika.parser.microsoft.EMFParser

Mime Types:

  • image/emf

JackcessParser

Class: org.apache.tika.parser.microsoft.JackcessParser

Mime Types:

  • application/x-msaccess

MSOwnerFileParser

Class: org.apache.tika.parser.microsoft.MSOwnerFileParser

Mime Types:

  • application/x-ms-owner

OfficeParser

Class: org.apache.tika.parser.microsoft.OfficeParser

Mime Types:

  • application/x-tika-msoffice-embedded; format=ole10_native
  • application/msword
  • application/vnd.visio
  • application/vnd.ms-project
  • application/x-tika-msworks-spreadsheet
  • application/x-mspublisher
  • application/vnd.ms-powerpoint
  • application/x-tika-msoffice
  • application/sldworks
  • application/x-tika-ooxml-protected
  • application/vnd.ms-excel
  • application/vnd.ms-outlook

OldExcelParser

Class: org.apache.tika.parser.microsoft.OldExcelParser

Mime Types:

  • application/vnd.ms-excel.workspace.3
  • application/vnd.ms-excel.workspace.4
  • application/vnd.ms-excel.sheet.2
  • application/vnd.ms-excel.sheet.3
  • application/vnd.ms-excel.sheet.4

TNEFParser

Class: org.apache.tika.parser.microsoft.TNEFParser

Mime Types:

  • application/vnd.ms-tnef
  • application/x-tnef
  • application/ms-tnef

WMFParser

Class: org.apache.tika.parser.microsoft.WMFParser

Mime Types:

  • image/wmf

OneNoteParser

Class: org.apache.tika.parser.microsoft.onenote.OneNoteParser

Mime Types:

  • application/onenote; format=one

OOXMLParser

Class: org.apache.tika.parser.microsoft.ooxml.OOXMLParser

Mime Types:

  • application/vnd.ms-powerpoint.template.macroenabled.12
  • application/vnd.ms-excel.addin.macroenabled.12
  • application/vnd.openxmlformats-officedocument.wordprocessingml.template
  • application/vnd.ms-excel.sheet.binary.macroenabled.12
  • application/vnd.openxmlformats-officedocument.wordprocessingml.document
  • application/vnd.ms-powerpoint.slide.macroenabled.12
  • application/vnd.ms-visio.drawing
  • application/vnd.ms-powerpoint.slideshow.macroenabled.12
  • application/vnd.ms-powerpoint.presentation.macroenabled.12
  • application/vnd.openxmlformats-officedocument.presentationml.slide
  • application/vnd.ms-excel.sheet.macroenabled.12
  • application/vnd.ms-word.template.macroenabled.12
  • application/vnd.ms-word.document.macroenabled.12
  • application/vnd.ms-powerpoint.addin.macroenabled.12
  • application/vnd.openxmlformats-officedocument.spreadsheetml.template
  • application/vnd.ms-xpsdocument
  • application/vnd.ms-visio.drawing.macroenabled.12
  • application/vnd.ms-visio.template.macroenabled.12
  • model/vnd.dwfx+xps
  • application/vnd.openxmlformats-officedocument.presentationml.template
  • application/vnd.openxmlformats-officedocument.presentationml.presentation
  • application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
  • application/vnd.ms-visio.stencil
  • application/vnd.ms-visio.template
  • application/vnd.openxmlformats-officedocument.presentationml.slideshow
  • application/vnd.ms-visio.stencil.macroenabled.12
  • application/vnd.ms-excel.template.macroenabled.12

Word2006MLParser

Class: org.apache.tika.parser.microsoft.ooxml.xwpf.ml2006.Word2006MLParser

Mime Types:

  • application/vnd.ms-word2006ml

SpreadsheetMLParser

Class: org.apache.tika.parser.microsoft.xml.SpreadsheetMLParser

Mime Types:

  • application/vnd.ms-spreadsheetml

WordMLParser

Class: org.apache.tika.parser.microsoft.xml.WordMLParser

Mime Types:

  • application/vnd.ms-wordml

MIFParser

Class: org.apache.tika.parser.mif.MIFParser

Mime Types:

  • application/x-mif
  • application/vnd.mif
  • application/x-maker

Mp3Parser

Class: org.apache.tika.parser.mp3.Mp3Parser

Mime Types:

  • audio/mpeg

NoakesMP4Parser

Class: org.apache.tika.parser.mp4.NoakesMP4Parser

Mime Types:

  • video/x-m4v
  • application/mp4
  • video/3gpp
  • video/3gpp2
  • video/quicktime
  • audio/mp4
  • video/mp4

NetCDFParser

Class: org.apache.tika.parser.netcdf.NetCDFParser

Mime Types:

  • application/x-netcdf

FlatOpenDocumentParser

Class: org.apache.tika.parser.odf.FlatOpenDocumentParser

Mime Types:

  • application/vnd.oasis.opendocument.tika.flat.document
  • application/vnd.oasis.opendocument.flat.presentation
  • application/vnd.oasis.opendocument.flat.spreadsheet
  • application/vnd.oasis.opendocument.flat.text

OpenDocumentParser

Class: org.apache.tika.parser.odf.OpenDocumentParser

Mime Types:

  • application/x-vnd.oasis.opendocument.presentation
  • application/vnd.oasis.opendocument.chart
  • application/x-vnd.oasis.opendocument.text-web
  • application/x-vnd.oasis.opendocument.image
  • application/vnd.oasis.opendocument.graphics-template
  • application/vnd.oasis.opendocument.text-web
  • application/x-vnd.oasis.opendocument.spreadsheet-template
  • application/vnd.oasis.opendocument.spreadsheet-template
  • application/vnd.sun.xml.writer
  • application/x-vnd.oasis.opendocument.graphics-template
  • application/vnd.oasis.opendocument.graphics
  • application/vnd.oasis.opendocument.spreadsheet
  • application/x-vnd.oasis.opendocument.chart
  • application/x-vnd.oasis.opendocument.spreadsheet
  • application/vnd.oasis.opendocument.image
  • application/x-vnd.oasis.opendocument.text
  • application/x-vnd.oasis.opendocument.text-template
  • application/vnd.oasis.opendocument.formula-template
  • application/x-vnd.oasis.opendocument.formula
  • application/vnd.oasis.opendocument.image-template
  • application/x-vnd.oasis.opendocument.image-template
  • application/x-vnd.oasis.opendocument.presentation-template
  • application/vnd.oasis.opendocument.presentation-template
  • application/vnd.oasis.opendocument.text
  • application/vnd.oasis.opendocument.text-template
  • application/vnd.oasis.opendocument.chart-template
  • application/x-vnd.oasis.opendocument.chart-template
  • application/x-vnd.oasis.opendocument.formula-template
  • application/x-vnd.oasis.opendocument.text-master
  • application/vnd.oasis.opendocument.presentation
  • application/x-vnd.oasis.opendocument.graphics
  • application/vnd.oasis.opendocument.formula
  • application/vnd.oasis.opendocument.text-master

PDFParser

Class: org.apache.tika.parser.pdf.PDFParser

Mime Types:

  • application/pdf

CompressorParser

Class: org.apache.tika.parser.pkg.CompressorParser

Mime Types:

  • application/zlib
  • application/x-gzip
  • application/x-bzip2
  • application/x-compress
  • application/x-java-pack200
  • application/x-lzma
  • application/deflate64
  • application/x-lz4
  • application/x-snappy
  • application/x-brotli
  • application/gzip
  • application/x-bzip
  • application/x-xz

PackageParser

Class: org.apache.tika.parser.pkg.PackageParser

Mime Types:

  • application/x-tar
  • application/java-archive
  • application/x-arj
  • application/x-archive
  • application/zip
  • application/x-cpio
  • application/x-tika-unix-dump
  • application/x-7z-compressed

RarParser

Class: org.apache.tika.parser.pkg.RarParser

Mime Types:

  • application/x-rar-compressed

RTFParser

Class: org.apache.tika.parser.rtf.RTFParser

Mime Types:

  • application/rtf

SAS7BDATParser

Class: org.apache.tika.parser.sas.SAS7BDATParser

Mime Types:

  • application/x-sas-data

FLVParser

Class: org.apache.tika.parser.video.FLVParser

Mime Types:

  • video/x-flv

QuattroProParser

Class: org.apache.tika.parser.wordperfect.QuattroProParser

Mime Types:

  • application/x-quattro-pro; version=9

WordPerfectParser

Class: org.apache.tika.parser.wordperfect.WordPerfectParser

Mime Types:

  • application/vnd.wordperfect; version=5.1
  • application/vnd.wordperfect; version=5.0
  • application/vnd.wordperfect; version=6.x

XLIFF12Parser

Class: org.apache.tika.parser.xliff.XLIFF12Parser

Mime Types:

  • application/x-xliff+xml

XLZParser

Class: org.apache.tika.parser.xliff.XLZParser

Mime Types:

  • application/x-xliff+zip

DcXMLParser

Class: org.apache.tika.parser.xml.DcXMLParser

Mime Types:

  • application/xml
  • image/svg+xml

FictionBookParser

Class: org.apache.tika.parser.xml.FictionBookParser

Mime Types:

  • application/x-fictionbook+xml

FlacParser

Class: org.gagravarr.tika.FlacParser

Mime Types:

  • audio/x-oggflac
  • audio/x-flac

OggParser

Class: org.gagravarr.tika.OggParser

Mime Types:

  • audio/ogg
  • application/kate
  • application/ogg
  • video/daala
  • video/x-ogguvs
  • video/x-ogm
  • audio/x-oggpcm
  • video/ogg
  • video/x-dirac
  • video/x-oggrgb
  • video/x-oggyuv

OpusParser

Class: org.gagravarr.tika.OpusParser

Mime Types:

  • audio/opus
  • audio/ogg; codecs=opus

SpeexParser

Class: org.gagravarr.tika.SpeexParser

Mime Types:

  • audio/ogg; codecs=speex
  • audio/speex

TheoraParser

Class: org.gagravarr.tika.TheoraParser

Mime Types:

  • video/theora

VorbisParser

Class: org.gagravarr.tika.VorbisParser

Mime Types:

  • audio/vorbis

HtmlParser

Class: org.hes.tika.parser.html.HtmlParser

Mime Types:

  • text/html
  • application/vnd.wap.xhtml+xml
  • application/x-asp
  • application/xhtml+xml

EmptyParser

Class: org.apache.tika.parser.EmptyParser

Decorated Parser - With Types

Mime Types:

TXTParser

Class: org.apache.tika.parser.txt.TXTParser

Decorated Parser - With Types

Mime Types:

RFC822Parser

Class: org.apache.tika.parser.mail.RFC822Parser

Decorated Parser - With Types

Mime Types:

HtmlParser

Class: org.hes.tika.parser.html.HtmlParser

Decorated Parser - With Types

Mime Types: