Skip to content
Snippets Groups Projects
bert.jsonnet 1.83 KiB
{
  "dataset_reader": {
    "type": "conll2003",
    "tag_label": "ner",
    "coding_scheme": "BIOUL",
    "token_indexers": {
      "bert": {
          "type": "bert-pretrained",
          "pretrained_model": std.extVar("BERT_VOCAB"),
          "do_lowercase": false,
          "use_starting_offsets": true
      },
      "token_characters": {
        "type": "characters",
        "min_padding_length": 3
      },
    }
  },
  "train_data_path": std.extVar("TRAIN_DATA_PATH"),
  "validation_data_path": std.extVar("TEST_A_PATH"),
  "model": {
    "type": "simple_tagger",
    "text_field_embedder": {
        "allow_unmatched_keys": true,
        "embedder_to_indexer_map": {
            "bert": ["bert", "bert-offsets"],
            "token_characters": ["token_characters"]
        },
        "token_embedders": {
            "bert": {
                "type": "bert-pretrained",
                "pretrained_model": std.extVar("BERT_WEIGHTS")
            },
            "token_characters": {
                "type": "character_encoding",
                "embedding": {
                    "embedding_dim": 16
                },
                "encoder": {
                    "type": "cnn",
                    "embedding_dim": 16,
                    "num_filters": 128,
                    "ngram_filter_sizes": [3],
                    "conv_layer_activation": "relu"
                }
            }
        }
    },
    "encoder": {
        "type": "lstm",
        "input_size": 896,
        "hidden_size": 100,
        "num_layers": 2,
        "dropout": 0.5,
        "bidirectional": true
    }
  },
  "iterator": {
    "type": "basic",
    "batch_size": 2
  },
  "trainer": {
    "optimizer": {
        "type": "bert_adam",
        "lr": 0.001
    },
    "num_serialized_models_to_keep": 3,
    "num_epochs": 4,
    "grad_norm": 5.0,
    "cuda_device": 0
  }
}