using System;
using System.Collections.Generic;
using System.IO.Compression;
using System.Text;
using System.Xml;
using System.IO;
using API.Services;
using Microsoft.Extensions.Logging;
namespace API.Helpers;
#nullable enable
/**
 * Contributed by https://github.com/microtherion
 *
 * All references to the "PDF Spec" (section numbers, etc.) refer to the
 * PDF 1.7 Specification a.k.a. PDF32000-1:2008
 * https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf
 */
/**
 * Reference for PDF Metadata Format
    %PDF-1.4                   ← Header
    Object 1 0 obj             ← Objects containing content
    << /Type /Catalog ... >>
    endobj
    Object 2 0 obj
    << /Type /Info ... >>
    endobj
    ...more objects...
    xref                       ← Cross-reference table
    0 6
    0000000000 65535 f
    0000000015 00000 n         ← Object 1 is at byte offset 15
    0000000109 00000 n         ← Object 2 is at byte offset 109
    ...
    trailer                    ← Trailer dictionary
    << /Size 6 /Root 1 0 R /Info 2 0 R >>
    startxref
    1234                       ← Byte offset where xref starts
    %%EOF
 */
/// 
/// Parse PDF file and try to extract as much metadata as possible.
/// Supports both text based XRef tables and compressed XRef streams (Deflate only).
/// Supports both UTF-16 and PDFDocEncoding for strings.
/// Lacks support for many PDF configurations that are theoretically possible, but should handle most common cases.
/// 
public class PdfMetadataExtractorException : Exception
{
    public PdfMetadataExtractorException()
    {
    }
    public PdfMetadataExtractorException(string message)
        : base(message)
    {
    }
    public PdfMetadataExtractorException(string message, Exception inner)
        : base(message, inner)
    {
    }
}
public interface IPdfMetadataExtractor : IDisposable
{
    Dictionary GetMetadata();
}
internal class PdfStringBuilder
{
    private readonly StringBuilder _builder = new();
    private bool _secondByte;
    private byte _prevByte;
    private bool _isUnicode;
    // PDFDocEncoding defined in PDF Spec D.1
    private readonly char[] _pdfDocMappingLow =
    [
        '\u02D8', '\u02C7', '\u02C6', '\u02D9', '\u02DD', '\u02DB', '\u02DA', '\u02DC'
    ];
    private readonly char[] _pdfDocMappingHigh =
    [
        '\u2022', '\u2020', '\u2021', '\u2026', '\u2014', '\u2013', '\u0192', '\u2044',
        '\u2039', '\u203A', '\u2212', '\u2030', '\u201E', '\u201C', '\u201D', '\u2018',
        '\u2019', '\u201A', '\u2122', '\uFB01', '\uFB02', '\u0141', '\u0152', '\u0160',
        '\u0178', '\u017D', '\u0131', '\u0142', '\u0153', '\u0161', '\u017E', ' ',
        '\u20AC'
    ];
    private void AppendPdfDocByte(byte b)
    {
        if (b is >= 0x18 and < 0x20)
        {
            _builder.Append(_pdfDocMappingLow[b - 0x18]);
        }
        else if (b is >= 0x80 and < 0xA1)
        {
            _builder.Append(_pdfDocMappingHigh[b - 0x80]);
        }
        else
        {
            _builder.Append((char)b);
        }
    }
    public void Append(char c)
    {
        _builder.Append(c);
    }
    public void AppendByte(byte b)
    {
        // PDF Spec 7.9.2.1: Strings are either UTF-16BE or PDFDocEncoded
        if (_builder.Length == 0 && !_isUnicode)
        {
            switch (_secondByte)
            {
                // Unicode strings are prefixed by a big endian BOM \uFEFF
                case true when b == 0xFF:
                    _isUnicode = true;
                    _secondByte = false;
                    break;
                case true:
                    AppendPdfDocByte(_prevByte);
                    AppendPdfDocByte(b);
                    break;
                case false when b == 0xFE:
                    _secondByte = true;
                    _prevByte = b;
                    break;
                default:
                    AppendPdfDocByte(b);
                    break;
            }
        }
        else if (_isUnicode)
        {
            if (_secondByte)
            {
                _builder.Append((char)(((char)_prevByte) << 8 | (char)b));
                _secondByte = false;
            }
            else
            {
                _prevByte = b;
                _secondByte = true;
            }
        }
        else
        {
            AppendPdfDocByte(b);
        }
    }
    public override string ToString()
    {
        if (_builder.Length == 0 && _secondByte)
        {
            AppendPdfDocByte(_prevByte);
        }
        return _builder.ToString();
    }
}
internal class PdfLexer(Stream stream)
{
    private const int BufferSize = 1024;
    private readonly byte[] _buffer = new byte[BufferSize];
    private int _pos;
    private int _valid;
    public enum TokenType
    {
        None,
        Bool,
        Int,
        Double,
        Name,
        String,
        ArrayStart,
        ArrayEnd,
        DictionaryStart,
        DictionaryEnd,
        StreamStart,
        StreamEnd,
        ObjectStart,
        ObjectEnd,
        ObjectRef,
        Keyword,
        Newline,
    }
    public struct Token(TokenType type, object value)
    {
        public TokenType Type = type;
        public object Value = value;
    }
    public Token NextToken(bool reportNewlines = false)
    {
        while (true)
        {
            switch ((char)NextByte())
            {
                case '\n' when reportNewlines:
                    return new Token(TokenType.Newline, true);
                case '\r' when reportNewlines:
                    if (NextByte() != '\n')
                    {
                        PutBack();
                    }
                    return new Token(TokenType.Newline, true);
                case ' ':
                case '\x00':
                case '\t':
                case '\n':
                case '\f':
                case '\r':
                    continue; // Skip whitespace
                case '%':
                    SkipComment();
                    continue;
                case '+':
                case '-':
                case '.':
                case >= '0' and <= '9':
                    return ScanNumber();
                case '/':
                    return ScanName();
                case '(':
                    return ScanString();
                case '[':
                    return new Token(TokenType.ArrayStart, true);
                case ']':
                    return new Token(TokenType.ArrayEnd, true);
                case '<':
                    if (NextByte() == '<')
                    {
                        return new Token(TokenType.DictionaryStart, true);
                    }
                    else
                    {
                        PutBack();
                        return ScanHexString();
                    }
                case '>':
                    ExpectByte((byte)'>');
                    return new Token(TokenType.DictionaryEnd, true);
                case >= 'a' and <= 'z':
                case >= 'A' and <= 'Z':
                    return ScanKeyword();
                default:
                    throw new PdfMetadataExtractorException("Unexpected byte, got {LastByte()}");
            }
        }
    }
    public void ResetBuffer()
    {
        _pos = 0;
        _valid = 0;
    }
    public bool TestByte(byte expected)
    {
        var result = NextByte() == expected;
        PutBack();
        return result;
    }
    public void ExpectNewline()
    {
        while (true)
        {
            var b = NextByte();
            switch ((char)b)
            {
                case ' ':
                case '\t':
                case '\f':
                    continue; // Skip whitespace
                case '\n':
                    return;
                case '\r':
                    if (NextByte() != '\n')
                    {
                        PutBack();
                    }
                    return;
                default:
                    throw new PdfMetadataExtractorException("Unexpected character, expected newline, got {b}");
            }
        }
    }
    public long GetXRefStart()
    {
        // Look for the startxref element as per PDF Spec 7.5.5
        while (true)
        {
            var b = NextByte();
            switch ((char)b)
            {
                case '\r':
                    b = NextByte();
                    if (b != '\n')
                    {
                        PutBack();
                    }
                    goto case '\n';
                case '\n':
                    // Handle consecutive newlines
                    while (true)
                    {
                        b = NextByte();
                        if (b == '\r')
                        {
                            goto case '\r';
                        }
                        else if (b == '\n')
                        {
                            goto case '\n';
                        }
                        else if (b == ' ' || b == '\t' || b == '\f')
                        {
                            continue;
                        }
                        else
                        {
                            PutBack();
                            break;
                        }
                    }
                    var token = NextToken(true);
                    if (token.Type == TokenType.Keyword && (string)token.Value == "startxref")
                    {
                        token = NextToken();
                        if (token.Type == TokenType.Int)
                        {
                            return (long)token.Value;
                        }
                        throw new PdfMetadataExtractorException("Expected integer after startxref keyword");
                    }
                    continue;
                default:
                    continue;
            }
        }
    }
    /// 
    ///
    /// 
    /// 
    ///     0000000015 00000 n    ← offset=15, generation=0, in-use
    ///     0000000109 00000 n    ← offset=109, generation=0, in-use
    ///     0000000000 65535 f    ← offset=0, generation=65535, free
    /// 
    /// Cross-reference table entry as per PDF Spec 7.5.4
    /// 
    public bool NextXRefEntry(out long offset, out int generation)
    {
        WantLookahead(20);
        if (_valid - _pos < 20)
        {
            throw new PdfMetadataExtractorException("End of stream");
        }
        // Parse the 20-byte XRef entry: "nnnnnnnnnn ggggg n/f \r\n"
        offset = Convert.ToInt64(Encoding.ASCII.GetString(_buffer, _pos, 10).Trim());
        generation = Convert.ToInt32(Encoding.ASCII.GetString(_buffer, _pos + 11, 5).Trim());
        var inUse = _buffer[_pos + 17] == 'n';
        _pos += 20;
        return inUse;
    }
    public Stream StreamObject(int length, bool deflate)
    {
        // Read a stream object as per PDF Spec 7.3.8
        // At the moment, we only accept uncompressed streams or the FlateDecode (PDF Spec 7.4.1) filter
        // with no parameters. These cover the vast majority of streams we're interested in.
        var rawData = new MemoryStream();
        ExpectNewline();
        if (_pos < _valid)
        {
            var buffered = Math.Min(_valid - _pos, length);
            rawData.Write(_buffer, _pos, buffered);
            length -= buffered;
            _pos += buffered;
        }
        while (length > 0)
        {
            var buffered = Math.Min(length, BufferSize);
            stream.ReadExactly(_buffer, 0, buffered);
            rawData.Write(_buffer, 0, buffered);
            _pos = 0;
            _valid = 0;
            length -= buffered;
        }
        rawData.Seek(0, SeekOrigin.Begin);
        if (deflate)
        {
            return new ZLibStream(rawData, CompressionMode.Decompress, false);
        }
        else
        {
            return rawData;
        }
    }
    private byte NextByte()
    {
        if (_pos >= _valid)
        {
            _pos = 0;
            _valid = stream.Read(_buffer, 0, BufferSize);
            if (_valid <= 0)
            {
                throw new PdfMetadataExtractorException("End of stream");
            }
        }
        return _buffer[_pos++];
    }
    private byte LastByte()
    {
        return _buffer[_pos - 1];
    }
    private void PutBack()
    {
        --_pos;
    }
    private void ExpectByte(byte expected)
    {
        if (NextByte() != expected)
        {
            throw new PdfMetadataExtractorException($"Unexpected character, expected {expected}");
        }
    }
    private void WantLookahead(int length)
    {
        if (_pos + length > _valid)
        {
            Buffer.BlockCopy(_buffer, _pos, _buffer, 0, _valid - _pos);
            _valid -= _pos;
            _pos = 0;
            _valid += stream.Read(_buffer, _valid, BufferSize - _valid);
        }
    }
    private void SkipComment()
    {
        while (true)
        {
            var b = NextByte();
            if (b == '\n')
            {
                break;
            }
            else if (b == '\r')
            {
                if (NextByte() != '\n')
                {
                    PutBack();
                }
                break;
            }
        }
    }
    private Token ScanNumber()
    {
        StringBuilder sb = new();
        var hasDot = LastByte() == '.';
        bool followedBySpace;
        sb.Append((char)LastByte());
        while (true)
        {
            var b = NextByte();
            if (b == '.' || b >= '0' && b <= '9')
            {
                sb.Append((char)b);
                if (b == '.')
                {
                    hasDot = true;
                }
            }
            else
            {
                followedBySpace = (b == ' ' || b == '\t');
                PutBack();
                break;
            }
        }
        if (hasDot)
        {
            return new Token(TokenType.Double, double.Parse(sb.ToString()));
        }
        if (followedBySpace)
        {
            // Look ahead to see if it's an object reference (PDF Spec 7.3.10)
            WantLookahead(32);
            var savedPos = _pos;
            var b = NextByte();
            while (b == ' ' || b == '\t')
            {
                b = NextByte();
            }
            // Generation number (ignored)
            while (b >= '0' && b <= '9')
            {
                b = NextByte();
            }
            while (b == ' ' || b == '\t')
            {
                b = NextByte();
            }
            if (b == 'R')
            {
                return new Token(TokenType.ObjectRef, long.Parse(sb.ToString()));
            }
            else if (b == 'o' && NextByte() == 'b' && NextByte() == 'j')
            {
                return new Token(TokenType.ObjectStart, long.Parse(sb.ToString()));
            }
            else
            {
                _pos = savedPos;
            }
        }
        return new Token(TokenType.Int, long.Parse(sb.ToString()));
    }
    private static int HexDigit(byte b)
    {
        return (char) b switch
        {
            >= '0' and <= '9' => b - (byte) '0',
            >= 'a' and <= 'f' => b - (byte) 'a' + 10,
            >= 'A' and <= 'F' => b - (byte) 'A' + 10,
            _ => throw new PdfMetadataExtractorException("Invalid hex digit, got {b}")
        };
    }
    private Token ScanName()
    {
        // PDF Spec 7.3.5
        var sb = new StringBuilder();
        while (true)
        {
            var b = NextByte();
            switch ((char)b)
            {
                case '(':
                case ')':
                case '[':
                case ']':
                case '{':
                case '}':
                case '<':
                case '>':
                case '/':
                case '%':
                    PutBack();
                    goto case ' ';
                case ' ':
                case '\t':
                case '\n':
                case '\f':
                case '\r':
                    return new Token(TokenType.Name, sb.ToString());
                case '#':
                    var b1 = NextByte();
                    var b2 = NextByte();
                    b = (byte)((HexDigit(b1) << 4) | HexDigit(b2));
                    goto default;
                default:
                    sb.Append((char)b);
                    break;
            }
        }
    }
    private Token ScanString()
    {
        // PDF Spec 7.3.4.2
        PdfStringBuilder sb = new();
        var parenLevel = 1;
        while (true)
        {
            var b = NextByte();
            switch ((char)b)
            {
                case '(':
                    parenLevel++;
                    sb.AppendByte(b);
                    break;
                case ')':
                    if (--parenLevel == 0)
                    {
                        return new Token(TokenType.String, sb.ToString());
                    }
                    sb.AppendByte(b);
                    break;
                case '\\':
                    b = NextByte();
                    switch ((char)b)
                    {
                        case 'b':
                            sb.Append('\b');
                            break;
                        case 'f':
                            sb.Append('\f');
                            break;
                        case 'n':
                            sb.Append('\n');
                            break;
                        case 'r':
                            sb.Append('\r');
                            break;
                        case 't':
                            sb.Append('\t');
                            break;
                        case >= '0' and <= '7':
                            var b2 = NextByte();
                            var b3 = NextByte();
                            if (b2 < '0' || b2 > '7' || b3 < '0' || b3 > '7')
                            {
                                throw new PdfMetadataExtractorException("Invalid octal escape, got {b1}{b2}{b3}");
                            }
                            sb.AppendByte((byte)((b - '0') << 6 | (b2 - '0') << 3 | (b3 - '0')));
                            break;
                    }
                    break;
                default:
                    sb.AppendByte(b);
                    break;
            }
        }
    }
    private Token ScanHexString()
    {
        // PDF Spec 7.3.4.3
        PdfStringBuilder sb = new();
        while (true)
        {
            var b = NextByte();
            switch ((char)b)
            {
                case (>= '0' and <= '9') or (>= 'a' and <= 'f') or (>= 'A' and <= 'F'):
                    var b1 = NextByte();
                    if (b1 == '>')
                    {
                        PutBack();
                        b1 = (byte)'0';
                    }
                    sb.AppendByte((byte)(HexDigit(b) << 4 | HexDigit(b1)));
                    break;
                case '>':
                    return new Token(TokenType.String, sb.ToString());
                default:
                    throw new PdfMetadataExtractorException("Invalid hex string, got {b}");
            }
        }
    }
    private Token ScanKeyword()
    {
        StringBuilder sb = new();
        sb.Append((char)LastByte());
        while (true)
        {
            var b = NextByte();
            if ((b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z'))
            {
                sb.Append((char)b);
            }
            else
            {
                PutBack();
                break;
            }
        }
        return sb.ToString() switch
        {
            "true" => new Token(TokenType.Bool, true),
            "false" => new Token(TokenType.Bool, false),
            "stream" => new Token(TokenType.StreamStart, true),
            "endstream" => new Token(TokenType.StreamEnd, true),
            "endobj" => new Token(TokenType.ObjectEnd, true),
            _ => new Token(TokenType.Keyword, sb.ToString())
        };
    }
}
internal class PdfMetadataExtractor : IPdfMetadataExtractor
{
    private readonly ILogger _logger;
    private readonly PdfLexer _lexer;
    private readonly FileStream _stream;
    private readonly Dictionary _objectOffsets = [];
    private readonly Dictionary _metadata = [];
    private readonly Stack _metadataRef = new();
    private bool _disposed;
    private struct MetadataRef(long root, long info)
    {
        public long Root = root;
        public long Info = info;
    }
    private readonly struct XRefSection(long first, long count)
    {
        public readonly long First = first;
        public readonly long Count = count;
    }
    public PdfMetadataExtractor(ILogger logger, string filename)
    {
        _logger = logger;
        _stream = File.OpenRead(filename);
        _lexer = new PdfLexer(_stream);
        ReadObjectOffsets();
        ReadMetadata(filename);
    }
    public Dictionary GetMetadata()
    {
        return _metadata;
    }
#pragma warning disable S1144
    private void LogMetadata(string filename)
#pragma warning restore S1144
    {
       _logger.LogTrace("Metadata for {Path}:", filename);
        foreach (var entry in _metadata)
        {
            _logger.LogTrace("   {Key:0,-5} : {Value:1}", entry.Key, entry.Value);
        }
    }
    private void ReadObjectOffsets()
    {
        // Look for file trailer (PDF Spec 7.5.5)
        // Spec says trailer must be strictly at end of file.
        // Adobe software accepts trailer within last 1K of EOF,
        // but in practice, virtually all PDFs have trailer at end.
        _stream.Seek(-32, SeekOrigin.End);
        var xrefOffset = _lexer.GetXRefStart();
        ReadXRefAndTrailer(xrefOffset);
    }
    private void ReadXRefAndTrailer(long xrefOffset)
    {
        _stream.Seek(xrefOffset, SeekOrigin.Begin);
        _lexer.ResetBuffer();
        if (!_lexer.TestByte((byte)'x'))
        {
            // Cross-reference stream (PDF Spec 7.5.8)
            ReadXRefStream();
            return;
        }
        // Cross-reference table (PDF Spec 7.5.4)
        var token = _lexer.NextToken();
        if (token.Type != PdfLexer.TokenType.Keyword || (string)token.Value != "xref")
        {
            throw new PdfMetadataExtractorException("Expected xref keyword");
        }
        while (true)
        {
            token = _lexer.NextToken();
            if (token.Type == PdfLexer.TokenType.Int)
            {
                var startObj = (long)token.Value;
                token = _lexer.NextToken();
                if (token.Type != PdfLexer.TokenType.Int)
                {
                    throw new PdfMetadataExtractorException("Expected number of objects in xref subsection");
                }
                var numObj = (long)token.Value;
                _lexer.ExpectNewline();
                for (var obj = startObj; obj < startObj + numObj; ++obj)
                {
                    var inUse = _lexer.NextXRefEntry(out var offset, out var generation);
                    if (inUse && offset > 0)
                    {
                        _objectOffsets[obj] = offset ;
                    }
                    // Free objects (inUse == false) are not stored in the dictionary
                }
            }
            else if (token.Type == PdfLexer.TokenType.Keyword && (string)token.Value == "trailer")
            {
                break;
            }
            else
            {
                throw new PdfMetadataExtractorException("Unexpected token in xref");
            }
        }
        ReadTrailerDictionary();
    }
    private void ReadXRefStream()
    {
        // Cross-reference stream (PDF Spec 7.5.8)
        var token = _lexer.NextToken();
        if (token.Type != PdfLexer.TokenType.ObjectStart)
        {
            throw new PdfMetadataExtractorException("Expected obj keyword");
        }
        long length = -1;
        long size = -1;
        var deflate = false;
        long prev = -1;
        long typeWidth = -1;
        long offsetWidth = -1;
        long generationWidth = -1;
        Queue sections = new();
        var meta = new MetadataRef(-1, -1);
        // Cross-reference stream dictionary (PDF Spec 7.5.8.2)
        ParseDictionary(delegate(string key, PdfLexer.Token value) {
            switch (key)
            {
                case "Type":
                    if (value.Type != PdfLexer.TokenType.Name || (string)value.Value != "XRef")
                    {
                        throw new PdfMetadataExtractorException("Expected /Type to be /XRef");
                    }
                    return true;
                case "Length":
                    if (value.Type != PdfLexer.TokenType.Int)
                    {
                        throw new PdfMetadataExtractorException("Expected integer after /Length");
                    }
                    length = (long)value.Value;
                    return true;
                case "Size":
                    if (value.Type != PdfLexer.TokenType.Int)
                    {
                        throw new PdfMetadataExtractorException("Expected integer after /Size");
                    }
                    size = (long)value.Value;
                    return true;
                case "Prev":
                    if (value.Type != PdfLexer.TokenType.Int)
                    {
                        throw new PdfMetadataExtractorException("Expected offset after /Prev");
                    }
                    prev = (long)value.Value;
                    return true;
                case "Index":
                    if (value.Type != PdfLexer.TokenType.ArrayStart)
                    {
                        throw new PdfMetadataExtractorException("Expected array after /Index");
                    }
                    while (true)
                    {
                        token = _lexer.NextToken();
                        if (token.Type == PdfLexer.TokenType.ArrayEnd)
                        {
                            break;
                        }
                        else if (token.Type != PdfLexer.TokenType.Int)
                        {
                            throw new PdfMetadataExtractorException("Expected integer in /Index array");
                        }
                        var first = (long)token.Value;
                        token = _lexer.NextToken();
                        if (token.Type != PdfLexer.TokenType.Int)
                        {
                            throw new PdfMetadataExtractorException("Expected integer pair in /Index array");
                        }
                        var count = (long)token.Value;
                        sections.Enqueue(new XRefSection(first, count));
                    }
                    return true;
                case "W":
                    if (value.Type != PdfLexer.TokenType.ArrayStart)
                    {
                        throw new PdfMetadataExtractorException("Expected array after /W");
                    }
                    var widths = new long[3];
                    for (var i = 0; i < 3; ++i)
                    {
                        token = _lexer.NextToken();
                        if (token.Type != PdfLexer.TokenType.Int)
                        {
                            throw new PdfMetadataExtractorException("Expected integer in /W array");
                        }
                        widths[i] = (long)token.Value;
                    }
                    token = _lexer.NextToken();
                    if (token.Type != PdfLexer.TokenType.ArrayEnd)
                    {
                        throw new PdfMetadataExtractorException("Unclosed array after /W");
                    }
                    typeWidth = widths[0];
                    offsetWidth = widths[1];
                    generationWidth = widths[2];
                    return true;
                case "Filter":
                    if (value.Type != PdfLexer.TokenType.Name)
                    {
                        throw new PdfMetadataExtractorException("Expected name after /Filter");
                    }
                    if ((string)value.Value != "FlateDecode")
                    {
                        throw new PdfMetadataExtractorException("Unsupported filter, only FlateDecode is supported");
                    }
                    deflate = true;
                    return true;
                case "Root":
                    if (value.Type != PdfLexer.TokenType.ObjectRef)
                    {
                        throw new PdfMetadataExtractorException("Expected object reference after /Root");
                    }
                    meta.Root = (long)value.Value;
                    return true;
                case "Info":
                    if (value.Type != PdfLexer.TokenType.ObjectRef)
                    {
                        throw new PdfMetadataExtractorException("Expected object reference after /Info");
                    }
                    meta.Info = (long)value.Value;
                    return true;
                default:
                    return false;
            }
        });
        token = _lexer.NextToken();
        if (token.Type != PdfLexer.TokenType.StreamStart)
        {
            throw new PdfMetadataExtractorException("Expected xref stream after dictionary");
        }
        var stream = _lexer.StreamObject((int)length, deflate);
        if (sections.Count == 0)
        {
            sections.Enqueue(new XRefSection(0, size));
        }
        while (sections.Count > 0)
        {
            var section = sections.Dequeue();
            for (var i = section.First; i < section.First + section.Count; ++i)
            {
                long type = 0;
                long offset = 0;
                long generation = 0;
                if (typeWidth == 0)
                {
                    type = 1;
                }
                for (var j = 0; j < typeWidth; ++j)
                {
                    type = (type << 8) | (ushort)stream.ReadByte();
                }
                for (var j = 0; j < offsetWidth; ++j)
                {
                    offset = (offset << 8) | (ushort)stream.ReadByte();
                }
                for (var j = 0; j < generationWidth; ++j)
                {
                    generation = (generation << 8) | (ushort)stream.ReadByte();
                }
                if (type == 1)
                {
                    _objectOffsets.TryAdd(i, offset);
                }
            }
        }
        if (prev > -1)
        {
            ReadXRefAndTrailer(prev);
        }
        PushMetadataRef(meta);
    }
    private void PushMetadataRef(MetadataRef meta)
    {
        if (_metadataRef.Count > 0)
        {
            if (meta.Root == _metadataRef.Peek().Root)
            {
                meta.Root = -1;
            }
            if (meta.Info == _metadataRef.Peek().Info)
            {
                meta.Info = -1;
            }
        }
        if (meta.Root != -1 || meta.Info != -1)
        {
            _metadataRef.Push(meta);
        }
    }
    private void ReadTrailerDictionary()
    {
        // Read trailer directory (PDF Spec 7.5.5)
        long prev = -1;
        long xrefStm = -1;
        MetadataRef meta = new(-1, -1);
        ParseDictionary(delegate(string key, PdfLexer.Token value)
        {
            switch (key)
            {
                case "Root":
                    if (value.Type != PdfLexer.TokenType.ObjectRef)
                    {
                        throw new PdfMetadataExtractorException("Expected object reference after /Root");
                    }
                    meta.Root = (long)value.Value;
                    return true;
                case "Prev":
                    if (value.Type != PdfLexer.TokenType.Int)
                    {
                        throw new PdfMetadataExtractorException("Expected offset after /Prev");
                    }
                    prev = (long)value.Value;
                    return true;
                case "Info":
                    if (value.Type != PdfLexer.TokenType.ObjectRef)
                    {
                        throw new PdfMetadataExtractorException("Expected object reference after /Info");
                    }
                    meta.Info = (long)value.Value;
                    return true;
                case "XRefStm":
                    // Prefer encoded xref stream over xref table
                    if (value.Type != PdfLexer.TokenType.Int)
                    {
                        throw new PdfMetadataExtractorException("Expected offset after /XRefStm");
                    }
                    xrefStm = (long)value.Value;
                    return true;
                case "Encrypt":
                    throw new PdfMetadataExtractorException("Encryption not supported");
                default:
                    return false;
            }
        });
        PushMetadataRef(meta);
        if (xrefStm != -1)
        {
            ReadXRefAndTrailer(xrefStm);
        }
        if (prev != -1)
        {
            ReadXRefAndTrailer(prev);
        }
    }
    private void ReadMetadata(string filename)
    {
        // We read potential metadata sources in backwards historical order, so
        // we can overwrite to our heart's content
        while (_metadataRef.Count > 0)
        {
            var meta = _metadataRef.Pop();
            _logger.LogTrace("DocumentCatalog for {Path}: {Root}, Info: {Info}", filename, meta.Root, meta.Info);
            ReadMetadataFromInfo(meta.Info);
            ReadMetadataFromXml(MetadataObjInObjectCatalog(meta.Root));
        }
    }
    private void ReadMetadataFromInfo(long infoObj)
    {
        // Document information dictionary (PDF Spec 14.3.3)
        // We treat this as less authoritative than the Metadata stream.
        if (!HasObject(infoObj))
        {
            return;
        }
        _stream.Seek(_objectOffsets[infoObj], SeekOrigin.Begin);
        _lexer.ResetBuffer();
        var token = _lexer.NextToken();
        if (token.Type != PdfLexer.TokenType.ObjectStart)
        {
            throw new PdfMetadataExtractorException("Expected object header");
        }
        Dictionary indirectObjects = [];
        ParseDictionary(delegate(string key, PdfLexer.Token value)
        {
            switch (key)
            {
                case "Title":
                case "Author":
                case "Subject":
                case "Keywords":
                case "Creator":
                case "Producer":
                case "CreationDate":
                case "ModDate":
                    if (value.Type == PdfLexer.TokenType.ObjectRef) {
                        indirectObjects[key] = (long)value.Value;
                    }
                    else if (value.Type != PdfLexer.TokenType.String)
                    {
                        throw new PdfMetadataExtractorException("Expected string value");
                    }
                    else
                    {
                        _metadata[key] = (string)value.Value;
                    }
                    return true;
                default:
                    return false;
            }
        });
        // Resolve indirectly referenced values
        foreach(var key in indirectObjects.Keys) {
            _stream.Seek(_objectOffsets[indirectObjects[key]], SeekOrigin.Begin);
            _lexer.ResetBuffer();
            token = _lexer.NextToken();
            if (token.Type != PdfLexer.TokenType.ObjectStart) {
                throw new PdfMetadataExtractorException("Expected object here");
            }
            token = _lexer.NextToken();
            if (token.Type != PdfLexer.TokenType.String) {
                throw new PdfMetadataExtractorException("Expected string");
            }
            _metadata[key] = (string) token.Value;
        }
    }
    private long MetadataObjInObjectCatalog(long rootObj)
    {
        // Look for /Metadata entry in document catalog (PDF Spec 7.7.2)
        if (!HasObject(rootObj))
        {
            return -1;
        }
        _stream.Seek(_objectOffsets[rootObj], SeekOrigin.Begin);
        _lexer.ResetBuffer();
        var token = _lexer.NextToken();
        if (token.Type != PdfLexer.TokenType.ObjectStart)
        {
            throw new PdfMetadataExtractorException("Expected object header");
        }
        long meta = -1;
        ParseDictionary(delegate(string key, PdfLexer.Token value)
        {
            switch (key) {
                case "Metadata":
                    if (value.Type != PdfLexer.TokenType.ObjectRef)
                    {
                        throw new PdfMetadataExtractorException("Expected object number after /Metadata");
                    }
                    meta = (long)value.Value;
                    return true;
                default:
                    return false;
            }
        });
        return meta;
    }
    // Obtain metadata from XMP stream object
    // See XMP specification: https://developer.adobe.com/xmp/docs/XMPSpecifications/
    // and Dublin Core: https://www.dublincore.org/specifications/dublin-core/
    private static string? GetTextFromXmlNode(XmlDocument doc, XmlNamespaceManager ns, string path)
    {
        return (doc.DocumentElement?.SelectSingleNode(path + "//rdf:li", ns)
            ?? doc.DocumentElement?.SelectSingleNode(path, ns))?.InnerText;
    }
    private static string? GetListFromXmlNode(XmlDocument doc, XmlNamespaceManager ns, string path)
    {
        var nodes = doc.DocumentElement?.SelectNodes(path + "//rdf:li", ns);
        if (nodes == null) return null;
        var list = new StringBuilder();
        foreach (XmlNode n in nodes)
        {
            if (list.Length > 0)
            {
                list.Append(',');
            }
            list.Append(n.InnerText);
        }
        return list.Length > 0 ? list.ToString() : null;
    }
    private void SetMetadata(string key, string? value)
    {
        if (value == null) return;
        _metadata[key] = value;
    }
    private void ReadMetadataFromXml(long meta)
    {
        if (!HasObject(meta)) return;
        _stream.Seek(_objectOffsets[meta], SeekOrigin.Begin);
        _lexer.ResetBuffer();
        var token = _lexer.NextToken();
        if (token.Type != PdfLexer.TokenType.ObjectStart)
        {
            throw new PdfMetadataExtractorException("Expected object header");
        }
        long length = -1;
        var deflate = false;
        // Metadata stream dictionary (PDF Spec 14.3.2)
        ParseDictionary(delegate(string key, PdfLexer.Token value)
        {
            switch (key) {
                case "Type":
                    if (value.Type != PdfLexer.TokenType.Name || (string)value.Value != "Metadata")
                    {
                        throw new PdfMetadataExtractorException("Expected /Type to be /Metadata");
                    }
                    return true;
                case "Subtype":
                    if (value.Type != PdfLexer.TokenType.Name || (string)value.Value != "XML")
                    {
                        throw new PdfMetadataExtractorException("Expected /Subtype to be /XML");
                    }
                    return true;
                case "Length":
                    if (value.Type != PdfLexer.TokenType.Int)
                    {
                        throw new PdfMetadataExtractorException("Expected integer after /Length");
                    }
                    length = (long)value.Value;
                    return true;
                case "Filter":
                    if (value.Type != PdfLexer.TokenType.Name)
                    {
                        throw new PdfMetadataExtractorException("Expected name after /Filter");
                    }
                    if ((string)value.Value != "FlateDecode")
                    {
                        throw new PdfMetadataExtractorException("Unsupported filter, only FlateDecode is supported");
                    }
                    deflate = true;
                    return true;
                default:
                    return false;
            }
        });
        token = _lexer.NextToken();
        if (token.Type != PdfLexer.TokenType.StreamStart)
        {
            throw new PdfMetadataExtractorException("Expected xref stream after dictionary");
        }
        var xmlStream = _lexer.StreamObject((int)length, deflate);
        // Skip XMP header
        while (true) {
            var b = xmlStream.ReadByte();
            if (b < 0) {
                throw new PdfMetadataExtractorException("Reached EOF in XMP header");
            }
            if (b == '?') {
                while (b == '?') {
                    b = xmlStream.ReadByte();
                }
                if (b == '>') {
                    break;
                }
            }
        }
        var metaDoc = new XmlDocument();
        metaDoc.Load(xmlStream);
        var ns = new XmlNamespaceManager(metaDoc.NameTable);
        ns.AddNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
        ns.AddNamespace("dc", "http://purl.org/dc/elements/1.1/");
        ns.AddNamespace("calibreSI", "http://calibre-ebook.com/xmp-namespace-series-index");
        ns.AddNamespace("calibre", "http://calibre-ebook.com/xmp-namespace");
        ns.AddNamespace("pdfx", "http://ns.adobe.com/pdfx/1.3/");
        ns.AddNamespace("prism", "http://prismstandard.org/namespaces/basic/2.0/");
        ns.AddNamespace("xmp", "http://ns.adobe.com/xap/1.0/");
        SetMetadata("CreationDate",
            GetTextFromXmlNode(metaDoc, ns, "//dc:date")
         ?? GetTextFromXmlNode(metaDoc, ns, "//xmp:CreateDate"));
        SetMetadata("Summary", GetTextFromXmlNode(metaDoc, ns, "//dc:description"));
        SetMetadata("Publisher", GetTextFromXmlNode(metaDoc, ns, "//dc:publisher"));
        SetMetadata("Author", GetListFromXmlNode(metaDoc, ns, "//dc:creator"));
        SetMetadata("Title", GetTextFromXmlNode(metaDoc, ns, "//dc:title"));
        SetMetadata("Subject", GetListFromXmlNode(metaDoc, ns, "//dc:subject"));
        SetMetadata("Language", GetTextFromXmlNode(metaDoc, ns, "//dc:language"));
        SetMetadata("ISBN", GetTextFromXmlNode(metaDoc, ns, "//pdfx:isbn") ?? GetTextFromXmlNode(metaDoc, ns, "//prism:isbn"));
        SetMetadata("UserRating", GetTextFromXmlNode(metaDoc, ns, "//calibre:rating"));
        SetMetadata("TitleSort", GetTextFromXmlNode(metaDoc, ns, "//calibre:title_sort"));
        SetMetadata("Series", GetTextFromXmlNode(metaDoc, ns, "//calibre:series/rdf:value"));
        SetMetadata("Volume", GetTextFromXmlNode(metaDoc, ns, "//calibreSI:series_index"));
    }
    private delegate bool DictionaryHandler(string key, PdfLexer.Token value);
    private void ParseDictionary(DictionaryHandler handler)
    {
        var token = _lexer.NextToken();
        if (token.Type != PdfLexer.TokenType.DictionaryStart)
        {
            throw new PdfMetadataExtractorException("Expected dictionary");
        }
        while (true)
        {
            token = _lexer.NextToken();
            if (token.Type == PdfLexer.TokenType.DictionaryEnd)
            {
                return;
            }
            if (token.Type == PdfLexer.TokenType.Name)
            {
                var value = _lexer.NextToken();
                if (!handler((string)token.Value, value)) {
                    SkipValue(value);
                }
            }
            else
            {
                throw new PdfMetadataExtractorException("Improper token in dictionary");
            }
        }
    }
    private void SkipValue(PdfLexer.Token? existingToken = null)
    {
        var token = existingToken ?? _lexer.NextToken();
        switch (token.Type)
        {
            case PdfLexer.TokenType.Bool:
            case PdfLexer.TokenType.Int:
            case PdfLexer.TokenType.Double:
            case PdfLexer.TokenType.Name:
            case PdfLexer.TokenType.String:
            case PdfLexer.TokenType.ObjectRef:
                break;
            case PdfLexer.TokenType.ArrayStart:
            {
                SkipArray();
                break;
            }
            case PdfLexer.TokenType.DictionaryStart:
            {
                SkipDictionary();
                break;
            }
            default:
                throw new PdfMetadataExtractorException("Unexpected token in SkipValue");
        }
    }
    private void SkipArray()
    {
        while (true)
        {
            var token = _lexer.NextToken();
            if (token.Type == PdfLexer.TokenType.ArrayEnd)
            {
                break;
            }
            SkipValue(token);
        }
    }
    private void SkipDictionary()
    {
        while (true)
        {
            var token = _lexer.NextToken();
            if (token.Type == PdfLexer.TokenType.DictionaryEnd)
            {
                break;
            }
            if (token.Type != PdfLexer.TokenType.Name)
            {
                throw new PdfMetadataExtractorException("Expected name in dictionary");
            }
            SkipValue();
        }
    }
    public void Dispose()
    {
        Dispose(true);
        GC.SuppressFinalize(this);
    }
    protected virtual void Dispose(bool disposing)
    {
        if (_disposed || !disposing) return;
        _stream.Dispose();
        _disposed = true;
    }
    private bool HasObject(long objNum)
    {
        return _objectOffsets.ContainsKey(objNum) && _objectOffsets[objNum] > 0;
    }
    private long GetObjectOffset(long objNum)
    {
        return _objectOffsets.TryGetValue(objNum, out var offset) ? offset : 0;
    }
}